pivoting, wide-to-long, melt

import seaborn as sns
import pandas as pd
import numpy as np

df_wide = pd.read_csv('rates.csv', parse_dates=['Time'])
df_wide.columns = ['Date'] + ['rate_%s' % col for col in df_wide.columns[1:]]
df_wide.head()
Date rate_USD rate_JPY rate_BGN rate_CZK rate_DKK rate_GBP rate_CHF
0 2024-01-17 1.0877 160.65 1.9558 24.755 7.4586 0.85818 0.9406
1 2024-01-16 1.0882 159.64 1.9558 24.710 7.4582 0.86078 0.9361
2 2024-01-15 1.0945 159.67 1.9558 24.714 7.4590 0.86075 0.9351
3 2024-01-12 1.0942 159.17 1.9558 24.689 7.4565 0.85950 0.9350
4 2024-01-11 1.0987 159.71 1.9558 24.659 7.4568 0.86145 0.9338

Pivot: transform a ‘’long’’ dataframe to ‘’wide’’ format

df_long = pd.wide_to_long(df_wide, stubnames=["rate_"], i='Date', j='currency', suffix='[A-Z]*')
df_long
rate_
Date currency
2024-01-17 USD 1.0877
2024-01-16 USD 1.0882
2024-01-15 USD 1.0945
2024-01-12 USD 1.0942
2024-01-11 USD 1.0987
... ... ...
2023-10-26 CHF 0.9466
2023-10-25 CHF 0.9474
2023-10-24 CHF 0.9501
2023-10-23 CHF 0.9461
2023-10-20 CHF 0.9442

427 rows × 1 columns

df_long.reset_index()
Date currency rate_
0 2024-01-17 USD 1.0877
1 2024-01-16 USD 1.0882
2 2024-01-15 USD 1.0945
3 2024-01-12 USD 1.0942
4 2024-01-11 USD 1.0987
... ... ... ...
422 2023-10-26 CHF 0.9466
423 2023-10-25 CHF 0.9474
424 2023-10-24 CHF 0.9501
425 2023-10-23 CHF 0.9461
426 2023-10-20 CHF 0.9442

427 rows × 3 columns

df_long.reset_index().pivot(index='Date', columns="currency", values='rate_')
currency BGN CHF CZK DKK GBP JPY USD
Date
2023-10-20 1.9558 0.9442 24.704 7.4620 0.87213 158.80 1.0591
2023-10-23 1.9558 0.9461 24.645 7.4634 0.87153 158.91 1.0597
2023-10-24 1.9558 0.9501 24.659 7.4648 0.87025 159.26 1.0632
2023-10-25 1.9558 0.9474 24.693 7.4639 0.87240 158.55 1.0576
2023-10-26 1.9558 0.9466 24.714 7.4632 0.87170 158.48 1.0540
... ... ... ... ... ... ... ...
2024-01-11 1.9558 0.9338 24.659 7.4568 0.86145 159.71 1.0987
2024-01-12 1.9558 0.9350 24.689 7.4565 0.85950 159.17 1.0942
2024-01-15 1.9558 0.9351 24.714 7.4590 0.86075 159.67 1.0945
2024-01-16 1.9558 0.9361 24.710 7.4582 0.86078 159.64 1.0882
2024-01-17 1.9558 0.9406 24.755 7.4586 0.85818 160.65 1.0877

61 rows × 7 columns

df_long = df_long.iloc[1:, :]
df_long
rate_
Date currency
2024-01-16 USD 1.0882
2024-01-15 USD 1.0945
2024-01-12 USD 1.0942
2024-01-11 USD 1.0987
2024-01-10 USD 1.0946
... ... ...
2023-10-26 CHF 0.9466
2023-10-25 CHF 0.9474
2023-10-24 CHF 0.9501
2023-10-23 CHF 0.9461
2023-10-20 CHF 0.9442

426 rows × 1 columns

# the pivot cannot fill the rate value on Jan 17 for USD since 
# the corresponding row in df_long is missing
df_long.reset_index().pivot(index='Date', columns="currency", values='rate_')
currency BGN CHF CZK DKK GBP JPY USD
Date
2023-10-20 1.9558 0.9442 24.704 7.4620 0.87213 158.80 1.0591
2023-10-23 1.9558 0.9461 24.645 7.4634 0.87153 158.91 1.0597
2023-10-24 1.9558 0.9501 24.659 7.4648 0.87025 159.26 1.0632
2023-10-25 1.9558 0.9474 24.693 7.4639 0.87240 158.55 1.0576
2023-10-26 1.9558 0.9466 24.714 7.4632 0.87170 158.48 1.0540
... ... ... ... ... ... ... ...
2024-01-11 1.9558 0.9338 24.659 7.4568 0.86145 159.71 1.0987
2024-01-12 1.9558 0.9350 24.689 7.4565 0.85950 159.17 1.0942
2024-01-15 1.9558 0.9351 24.714 7.4590 0.86075 159.67 1.0945
2024-01-16 1.9558 0.9361 24.710 7.4582 0.86078 159.64 1.0882
2024-01-17 1.9558 0.9406 24.755 7.4586 0.85818 160.65 NaN

61 rows × 7 columns

melt a dataframe to transform it to a long format by specifying identifier variables

df_penguins = sns.load_dataset('penguins')
df_penguins
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
3 Adelie Torgersen NaN NaN NaN NaN NaN
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 Female
... ... ... ... ... ... ... ...
339 Gentoo Biscoe NaN NaN NaN NaN NaN
340 Gentoo Biscoe 46.8 14.3 215.0 4850.0 Female
341 Gentoo Biscoe 50.4 15.7 222.0 5750.0 Male
342 Gentoo Biscoe 45.2 14.8 212.0 5200.0 Female
343 Gentoo Biscoe 49.9 16.1 213.0 5400.0 Male

344 rows × 7 columns

df_penguins.melt(id_vars=['species', 'island', 'sex'])
species island sex variable value
0 Adelie Torgersen Male bill_length_mm 39.1
1 Adelie Torgersen Female bill_length_mm 39.5
2 Adelie Torgersen Female bill_length_mm 40.3
3 Adelie Torgersen NaN bill_length_mm NaN
4 Adelie Torgersen Female bill_length_mm 36.7
... ... ... ... ... ...
1371 Gentoo Biscoe NaN body_mass_g NaN
1372 Gentoo Biscoe Female body_mass_g 4850.0
1373 Gentoo Biscoe Male body_mass_g 5750.0
1374 Gentoo Biscoe Female body_mass_g 5200.0
1375 Gentoo Biscoe Male body_mass_g 5400.0

1376 rows × 5 columns