import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
pandas DataFrame (2d)
DataFrame from historal rates data
= pd.read_csv('rates.csv', parse_dates=['Time']) # wrong, dates are not ordered correctly
df = df['Time']
df.index df.head()
Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
df.dtypes
Time datetime64[ns]
USD float64
JPY float64
BGN float64
CZK float64
DKK float64
GBP float64
CHF float64
dtype: object
# RangeIndex, enumerates the rows df.index
DatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
'2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
'2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
'2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
'2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
'2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
'2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
'2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
'2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
'2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
'2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
'2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
'2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
'2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
'2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
'2023-10-20'],
dtype='datetime64[ns]', name='Time', freq=None)
= df['Time']
df.index df.index
DatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
'2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
'2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
'2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
'2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
'2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
'2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
'2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
'2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
'2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
'2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
'2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
'2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
'2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
'2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
'2023-10-20'],
dtype='datetime64[ns]', name='Time', freq=None)
df
Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2023-10-26 | 2023-10-26 | 1.0540 | 158.48 | 1.9558 | 24.714 | 7.4632 | 0.87170 | 0.9466 |
2023-10-25 | 2023-10-25 | 1.0576 | 158.55 | 1.9558 | 24.693 | 7.4639 | 0.87240 | 0.9474 |
2023-10-24 | 2023-10-24 | 1.0632 | 159.26 | 1.9558 | 24.659 | 7.4648 | 0.87025 | 0.9501 |
2023-10-23 | 2023-10-23 | 1.0597 | 158.91 | 1.9558 | 24.645 | 7.4634 | 0.87153 | 0.9461 |
2023-10-20 | 2023-10-20 | 1.0591 | 158.80 | 1.9558 | 24.704 | 7.4620 | 0.87213 | 0.9442 |
61 rows × 8 columns
df.shape
(61, 8)
Selecting/dropping columns
df= df.drop(columns=['Time', 'BGN', 'DKK', 'CZK'])
df_dropped df_dropped.head()
USD | JPY | GBP | CHF | |
---|---|---|---|---|
Time | ||||
2024-01-17 | 1.0877 | 160.65 | 0.85818 | 0.9406 |
2024-01-16 | 1.0882 | 159.64 | 0.86078 | 0.9361 |
2024-01-15 | 1.0945 | 159.67 | 0.86075 | 0.9351 |
2024-01-12 | 1.0942 | 159.17 | 0.85950 | 0.9350 |
2024-01-11 | 1.0987 | 159.71 | 0.86145 | 0.9338 |
# original df unaffected df.head()
Time | USD | JPY | BGN | CZK | DKK | GBP | CHF | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2024-01-17 | 2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
2024-01-16 | 2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
2024-01-15 | 2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
2024-01-12 | 2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
2024-01-11 | 2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
# Using a boolean mask to select columns
!= 'Time' df.columns
array([False, True, True, True, True, True, True, True])
= df.loc[:, df.columns != 'Time']
df_dropped2 df_dropped2
USD | JPY | BGN | CZK | DKK | GBP | CHF | |
---|---|---|---|---|---|---|---|
Time | |||||||
2024-01-17 | 1.0877 | 160.65 | 1.9558 | 24.755 | 7.4586 | 0.85818 | 0.9406 |
2024-01-16 | 1.0882 | 159.64 | 1.9558 | 24.710 | 7.4582 | 0.86078 | 0.9361 |
2024-01-15 | 1.0945 | 159.67 | 1.9558 | 24.714 | 7.4590 | 0.86075 | 0.9351 |
2024-01-12 | 1.0942 | 159.17 | 1.9558 | 24.689 | 7.4565 | 0.85950 | 0.9350 |
2024-01-11 | 1.0987 | 159.71 | 1.9558 | 24.659 | 7.4568 | 0.86145 | 0.9338 |
... | ... | ... | ... | ... | ... | ... | ... |
2023-10-26 | 1.0540 | 158.48 | 1.9558 | 24.714 | 7.4632 | 0.87170 | 0.9466 |
2023-10-25 | 1.0576 | 158.55 | 1.9558 | 24.693 | 7.4639 | 0.87240 | 0.9474 |
2023-10-24 | 1.0632 | 159.26 | 1.9558 | 24.659 | 7.4648 | 0.87025 | 0.9501 |
2023-10-23 | 1.0597 | 158.91 | 1.9558 | 24.645 | 7.4634 | 0.87153 | 0.9461 |
2023-10-20 | 1.0591 | 158.80 | 1.9558 | 24.704 | 7.4620 | 0.87213 | 0.9442 |
61 rows × 7 columns
# Dropping several columns
= df.drop(columns=['CZK', 'USD', 'Time'])
df_dropped3 df_dropped3.head()
JPY | BGN | DKK | GBP | CHF | |
---|---|---|---|---|---|
Time | |||||
2024-01-17 | 160.65 | 1.9558 | 7.4586 | 0.85818 | 0.9406 |
2024-01-16 | 159.64 | 1.9558 | 7.4582 | 0.86078 | 0.9361 |
2024-01-15 | 159.67 | 1.9558 | 7.4590 | 0.86075 | 0.9351 |
2024-01-12 | 159.17 | 1.9558 | 7.4565 | 0.85950 | 0.9350 |
2024-01-11 | 159.71 | 1.9558 | 7.4568 | 0.86145 | 0.9338 |