pandas DataFrame (2d)

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

DataFrame from historal rates data

df = pd.read_csv('rates.csv', parse_dates=['Time']) # wrong, dates are not ordered correctly
df.index = df['Time']
df.head()
Time USD JPY BGN CZK DKK GBP CHF
Time
2024-01-17 2024-01-17 1.0877 160.65 1.9558 24.755 7.4586 0.85818 0.9406
2024-01-16 2024-01-16 1.0882 159.64 1.9558 24.710 7.4582 0.86078 0.9361
2024-01-15 2024-01-15 1.0945 159.67 1.9558 24.714 7.4590 0.86075 0.9351
2024-01-12 2024-01-12 1.0942 159.17 1.9558 24.689 7.4565 0.85950 0.9350
2024-01-11 2024-01-11 1.0987 159.71 1.9558 24.659 7.4568 0.86145 0.9338
df.dtypes
Time    datetime64[ns]
USD            float64
JPY            float64
BGN            float64
CZK            float64
DKK            float64
GBP            float64
CHF            float64
dtype: object
df.index  # RangeIndex, enumerates the rows
DatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
               '2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
               '2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
               '2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
               '2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
               '2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
               '2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
               '2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
               '2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
               '2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
               '2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
               '2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
               '2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
               '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
               '2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
               '2023-10-20'],
              dtype='datetime64[ns]', name='Time', freq=None)
df.index = df['Time']
df.index
DatetimeIndex(['2024-01-17', '2024-01-16', '2024-01-15', '2024-01-12',
               '2024-01-11', '2024-01-10', '2024-01-09', '2024-01-08',
               '2024-01-05', '2024-01-04', '2024-01-03', '2024-01-02',
               '2023-12-29', '2023-12-28', '2023-12-27', '2023-12-22',
               '2023-12-21', '2023-12-20', '2023-12-19', '2023-12-18',
               '2023-12-15', '2023-12-14', '2023-12-13', '2023-12-12',
               '2023-12-11', '2023-12-08', '2023-12-07', '2023-12-06',
               '2023-12-05', '2023-12-04', '2023-12-01', '2023-11-30',
               '2023-11-29', '2023-11-28', '2023-11-27', '2023-11-24',
               '2023-11-23', '2023-11-22', '2023-11-21', '2023-11-20',
               '2023-11-17', '2023-11-16', '2023-11-15', '2023-11-14',
               '2023-11-13', '2023-11-10', '2023-11-09', '2023-11-08',
               '2023-11-07', '2023-11-06', '2023-11-03', '2023-11-02',
               '2023-11-01', '2023-10-31', '2023-10-30', '2023-10-27',
               '2023-10-26', '2023-10-25', '2023-10-24', '2023-10-23',
               '2023-10-20'],
              dtype='datetime64[ns]', name='Time', freq=None)
df
Time USD JPY BGN CZK DKK GBP CHF
Time
2024-01-17 2024-01-17 1.0877 160.65 1.9558 24.755 7.4586 0.85818 0.9406
2024-01-16 2024-01-16 1.0882 159.64 1.9558 24.710 7.4582 0.86078 0.9361
2024-01-15 2024-01-15 1.0945 159.67 1.9558 24.714 7.4590 0.86075 0.9351
2024-01-12 2024-01-12 1.0942 159.17 1.9558 24.689 7.4565 0.85950 0.9350
2024-01-11 2024-01-11 1.0987 159.71 1.9558 24.659 7.4568 0.86145 0.9338
... ... ... ... ... ... ... ... ...
2023-10-26 2023-10-26 1.0540 158.48 1.9558 24.714 7.4632 0.87170 0.9466
2023-10-25 2023-10-25 1.0576 158.55 1.9558 24.693 7.4639 0.87240 0.9474
2023-10-24 2023-10-24 1.0632 159.26 1.9558 24.659 7.4648 0.87025 0.9501
2023-10-23 2023-10-23 1.0597 158.91 1.9558 24.645 7.4634 0.87153 0.9461
2023-10-20 2023-10-20 1.0591 158.80 1.9558 24.704 7.4620 0.87213 0.9442

61 rows × 8 columns

df.shape
(61, 8)

Selecting/dropping columns

df
df_dropped = df.drop(columns=['Time', 'BGN', 'DKK', 'CZK'])
df_dropped.head()
USD JPY GBP CHF
Time
2024-01-17 1.0877 160.65 0.85818 0.9406
2024-01-16 1.0882 159.64 0.86078 0.9361
2024-01-15 1.0945 159.67 0.86075 0.9351
2024-01-12 1.0942 159.17 0.85950 0.9350
2024-01-11 1.0987 159.71 0.86145 0.9338
df.head() # original df unaffected
Time USD JPY BGN CZK DKK GBP CHF
Time
2024-01-17 2024-01-17 1.0877 160.65 1.9558 24.755 7.4586 0.85818 0.9406
2024-01-16 2024-01-16 1.0882 159.64 1.9558 24.710 7.4582 0.86078 0.9361
2024-01-15 2024-01-15 1.0945 159.67 1.9558 24.714 7.4590 0.86075 0.9351
2024-01-12 2024-01-12 1.0942 159.17 1.9558 24.689 7.4565 0.85950 0.9350
2024-01-11 2024-01-11 1.0987 159.71 1.9558 24.659 7.4568 0.86145 0.9338
# Using a boolean mask to select columns
df.columns != 'Time'
array([False,  True,  True,  True,  True,  True,  True,  True])
df_dropped2 = df.loc[:, df.columns != 'Time']
df_dropped2
USD JPY BGN CZK DKK GBP CHF
Time
2024-01-17 1.0877 160.65 1.9558 24.755 7.4586 0.85818 0.9406
2024-01-16 1.0882 159.64 1.9558 24.710 7.4582 0.86078 0.9361
2024-01-15 1.0945 159.67 1.9558 24.714 7.4590 0.86075 0.9351
2024-01-12 1.0942 159.17 1.9558 24.689 7.4565 0.85950 0.9350
2024-01-11 1.0987 159.71 1.9558 24.659 7.4568 0.86145 0.9338
... ... ... ... ... ... ... ...
2023-10-26 1.0540 158.48 1.9558 24.714 7.4632 0.87170 0.9466
2023-10-25 1.0576 158.55 1.9558 24.693 7.4639 0.87240 0.9474
2023-10-24 1.0632 159.26 1.9558 24.659 7.4648 0.87025 0.9501
2023-10-23 1.0597 158.91 1.9558 24.645 7.4634 0.87153 0.9461
2023-10-20 1.0591 158.80 1.9558 24.704 7.4620 0.87213 0.9442

61 rows × 7 columns

# Dropping several columns
df_dropped3 = df.drop(columns=['CZK', 'USD', 'Time'])
df_dropped3.head()
JPY BGN DKK GBP CHF
Time
2024-01-17 160.65 1.9558 7.4586 0.85818 0.9406
2024-01-16 159.64 1.9558 7.4582 0.86078 0.9361
2024-01-15 159.67 1.9558 7.4590 0.86075 0.9351
2024-01-12 159.17 1.9558 7.4565 0.85950 0.9350
2024-01-11 159.71 1.9558 7.4568 0.86145 0.9338