import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as snspandas DataFrame (2d)
DataFrame from historal rates data
df = pd.read_csv('rates.csv', parse_dates=['Time']) # wrong, dates are not ordered correctly
df.index = df['Time']
df.head()| Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
|---|---|---|---|---|---|---|---|---|
| Time | ||||||||
| 2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
| 2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
| 2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
| 2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
| 2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
df.dtypesTime datetime64[ns]
USD float64
JPY float64
BGN float64
CZK float64
DKK float64
GBP float64
CHF float64
dtype: object
df.index # RangeIndex, enumerates the rowsDatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
'2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
'2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
'2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
'2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
'2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
'2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
'2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
'2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
'2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
'2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
'2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
'2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
'2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
'2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
'2023-10-20'],
dtype='datetime64[ns]', name='Time', freq=None)
df.index = df['Time']
df.indexDatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
'2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
'2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
'2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
'2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
'2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
'2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
'2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
'2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
'2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
'2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
'2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
'2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
'2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
'2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
'2023-10-20'],
dtype='datetime64[ns]', name='Time', freq=None)
df| Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
|---|---|---|---|---|---|---|---|---|
| Time | ||||||||
| 2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
| 2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
| 2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
| 2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
| 2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2023-10-26 | 2023-10-26 | 1.0540 | 158.48 | 1.9558 | 24.714 | 7.4632 | 0.87170 | 0.9466 |
| 2023-10-25 | 2023-10-25 | 1.0576 | 158.55 | 1.9558 | 24.693 | 7.4639 | 0.87240 | 0.9474 |
| 2023-10-24 | 2023-10-24 | 1.0632 | 159.26 | 1.9558 | 24.659 | 7.4648 | 0.87025 | 0.9501 |
| 2023-10-23 | 2023-10-23 | 1.0597 | 158.91 | 1.9558 | 24.645 | 7.4634 | 0.87153 | 0.9461 |
| 2023-10-20 | 2023-10-20 | 1.0591 | 158.80 | 1.9558 | 24.704 | 7.4620 | 0.87213 | 0.9442 |
61 rows × 8 columns
df.shape(61, 8)
Selecting/dropping columns
df
df_dropped = df.drop(columns=['Time', 'BGN', 'DKK', 'CZK'])
df_dropped.head()| USD | JPY | GBP | CHF | |
|---|---|---|---|---|
| Time | ||||
| 2024-01-17 | 1.0877 | 160.65 | 0.85818 | 0.9406 |
| 2024-01-16 | 1.0882 | 159.64 | 0.86078 | 0.9361 |
| 2024-01-15 | 1.0945 | 159.67 | 0.86075 | 0.9351 |
| 2024-01-12 | 1.0942 | 159.17 | 0.85950 | 0.9350 |
| 2024-01-11 | 1.0987 | 159.71 | 0.86145 | 0.9338 |
df.head() # original df unaffected| Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
|---|---|---|---|---|---|---|---|---|
| Time | ||||||||
| 2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
| 2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
| 2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
| 2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
| 2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
# Using a boolean mask to select columns
df.columns != 'Time'array([False, True, True, True, True, True, True, True])
df_dropped2 = df.loc[:, df.columns != 'Time']
df_dropped2| USD | JPY | BGN | CZK | DKK | GBP | CHF | |
|---|---|---|---|---|---|---|---|
| Time | |||||||
| 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
| 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
| 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
| 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
| 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2023-10-26 | 1.0540 | 158.48 | 1.9558 | 24.714 | 7.4632 | 0.87170 | 0.9466 |
| 2023-10-25 | 1.0576 | 158.55 | 1.9558 | 24.693 | 7.4639 | 0.87240 | 0.9474 |
| 2023-10-24 | 1.0632 | 159.26 | 1.9558 | 24.659 | 7.4648 | 0.87025 | 0.9501 |
| 2023-10-23 | 1.0597 | 158.91 | 1.9558 | 24.645 | 7.4634 | 0.87153 | 0.9461 |
| 2023-10-20 | 1.0591 | 158.80 | 1.9558 | 24.704 | 7.4620 | 0.87213 | 0.9442 |
61 rows × 7 columns
# Dropping several columns
df_dropped3 = df.drop(columns=['CZK', 'USD', 'Time'])
df_dropped3.head()| JPY | BGN | DKK | GBP | CHF | |
|---|---|---|---|---|---|
| Time | |||||
| 2024-01-17 | 160.65 | 1.9558 | 7.4586 | 0.85818 | 0.9406 |
| 2024-01-16 | 159.64 | 1.9558 | 7.4582 | 0.86078 | 0.9361 |
| 2024-01-15 | 159.67 | 1.9558 | 7.4590 | 0.86075 | 0.9351 |
| 2024-01-12 | 159.17 | 1.9558 | 7.4565 | 0.85950 | 0.9350 |
| 2024-01-11 | 159.71 | 1.9558 | 7.4568 | 0.86145 | 0.9338 |