import seaborn as sns
import pandas as pd
= sns.load_dataset('penguins') df_penguins
MultiIndex / Advanced indexing
https://pandas.pydata.org/docs/user_guide/advanced.html
groupby and aggregation
# group by
= df_penguins.groupby(['species', 'island', 'sex']) grouped
'mean', 'median']) grouped.agg([
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 | ||
Chinstrap | Dream | Female | 46.573529 | 46.30 | 17.588235 | 17.65 | 191.735294 | 192.0 | 3527.205882 | 3550.0 |
Male | 51.094118 | 50.95 | 19.252941 | 19.30 | 199.911765 | 200.5 | 3938.970588 | 3950.0 | ||
Gentoo | Biscoe | Female | 45.563793 | 45.50 | 14.237931 | 14.25 | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
stack and unstack
'mean', 'median']).unstack(level='species') grouped.agg([
bill_length_mm | bill_depth_mm | ... | flipper_length_mm | body_mass_g | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | ... | mean | median | mean | median | ||||||||||||||
species | Adelie | Chinstrap | Gentoo | Adelie | Chinstrap | Gentoo | Adelie | Chinstrap | Gentoo | Adelie | ... | Gentoo | Adelie | Chinstrap | Gentoo | Adelie | Chinstrap | Gentoo | Adelie | Chinstrap | Gentoo | |
island | sex | |||||||||||||||||||||
Biscoe | Female | 37.359091 | NaN | 45.563793 | 37.75 | NaN | 45.5 | 17.704545 | NaN | 14.237931 | 17.70 | ... | 212.706897 | 187.0 | NaN | 212.0 | 3369.318182 | NaN | 4679.741379 | 3375.0 | NaN | 4700.0 |
Male | 40.590909 | NaN | 49.473770 | 40.80 | NaN | 49.5 | 19.036364 | NaN | 15.718033 | 18.90 | ... | 221.540984 | 191.0 | NaN | 221.0 | 4050.000000 | NaN | 5484.836066 | 4000.0 | NaN | 5500.0 | |
Dream | Female | 36.911111 | 46.573529 | NaN | 36.80 | 46.30 | NaN | 17.618519 | 17.588235 | NaN | 17.80 | ... | NaN | 188.0 | 192.0 | NaN | 3344.444444 | 3527.205882 | NaN | 3400.0 | 3550.0 | NaN |
Male | 40.071429 | 51.094118 | NaN | 40.25 | 50.95 | NaN | 18.839286 | 19.252941 | NaN | 18.65 | ... | NaN | 190.5 | 200.5 | NaN | 4045.535714 | 3938.970588 | NaN | 3987.5 | 3950.0 | NaN | |
Torgersen | Female | 37.554167 | NaN | NaN | 37.60 | NaN | NaN | 17.550000 | NaN | NaN | 17.45 | ... | NaN | 189.0 | NaN | NaN | 3395.833333 | NaN | NaN | 3400.0 | NaN | NaN |
Male | 40.586957 | NaN | NaN | 41.10 | NaN | NaN | 19.391304 | NaN | NaN | 19.20 | ... | NaN | 195.0 | NaN | NaN | 4034.782609 | NaN | NaN | 4000.0 | NaN | NaN |
6 rows × 24 columns
'mean', 'median']).unstack().stack(level=2) grouped.agg([
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 | ||
Chinstrap | Dream | Female | 46.573529 | 46.30 | 17.588235 | 17.65 | 191.735294 | 192.0 | 3527.205882 | 3550.0 |
Male | 51.094118 | 50.95 | 19.252941 | 19.30 | 199.911765 | 200.5 | 3938.970588 | 3950.0 | ||
Gentoo | Biscoe | Female | 45.563793 | 45.50 | 14.237931 | 14.25 | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
'mean', 'median']).index grouped.agg([
MultiIndex([( 'Adelie', 'Biscoe', 'Female'),
( 'Adelie', 'Biscoe', 'Male'),
( 'Adelie', 'Dream', 'Female'),
( 'Adelie', 'Dream', 'Male'),
( 'Adelie', 'Torgersen', 'Female'),
( 'Adelie', 'Torgersen', 'Male'),
('Chinstrap', 'Dream', 'Female'),
('Chinstrap', 'Dream', 'Male'),
( 'Gentoo', 'Biscoe', 'Female'),
( 'Gentoo', 'Biscoe', 'Male')],
names=['species', 'island', 'sex'])
'mean', 'median']).index.values grouped.agg([
array([('Adelie', 'Biscoe', 'Female'), ('Adelie', 'Biscoe', 'Male'),
('Adelie', 'Dream', 'Female'), ('Adelie', 'Dream', 'Male'),
('Adelie', 'Torgersen', 'Female'), ('Adelie', 'Torgersen', 'Male'),
('Chinstrap', 'Dream', 'Female'), ('Chinstrap', 'Dream', 'Male'),
('Gentoo', 'Biscoe', 'Female'), ('Gentoo', 'Biscoe', 'Male')],
dtype=object)
'mean', 'median']).index.get_level_values('sex') grouped.agg([
Index(['Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male',
'Female', 'Male'],
dtype='object', name='sex')
'mean', 'median']).index.get_level_values('island') grouped.agg([
Index(['Biscoe', 'Biscoe', 'Dream', 'Dream', 'Torgersen', 'Torgersen', 'Dream',
'Dream', 'Biscoe', 'Biscoe'],
dtype='object', name='island')
'mean', 'median']).index.levels grouped.agg([
FrozenList([['Adelie', 'Chinstrap', 'Gentoo'], ['Biscoe', 'Dream', 'Torgersen'], ['Female', 'Male']])
Swapping levels, reordering levels
= df_penguins.groupby(['species', 'island', 'sex'])
grouped 'mean') grouped.agg(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
species | island | sex | ||||
Adelie | Biscoe | Female | 37.359091 | 17.704545 | 187.181818 | 3369.318182 |
Male | 40.590909 | 19.036364 | 190.409091 | 4050.000000 | ||
Dream | Female | 36.911111 | 17.618519 | 187.851852 | 3344.444444 | |
Male | 40.071429 | 18.839286 | 191.928571 | 4045.535714 | ||
Torgersen | Female | 37.554167 | 17.550000 | 188.291667 | 3395.833333 | |
Male | 40.586957 | 19.391304 | 194.913043 | 4034.782609 | ||
Chinstrap | Dream | Female | 46.573529 | 17.588235 | 191.735294 | 3527.205882 |
Male | 51.094118 | 19.252941 | 199.911765 | 3938.970588 | ||
Gentoo | Biscoe | Female | 45.563793 | 14.237931 | 212.706897 | 4679.741379 |
Male | 49.473770 | 15.718033 | 221.540984 | 5484.836066 |
'mean').swaplevel("species", "island").swaplevel('island', 'sex').sort_index() grouped.agg(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
sex | species | island | ||||
Female | Adelie | Biscoe | 37.359091 | 17.704545 | 187.181818 | 3369.318182 |
Dream | 36.911111 | 17.618519 | 187.851852 | 3344.444444 | ||
Torgersen | 37.554167 | 17.550000 | 188.291667 | 3395.833333 | ||
Chinstrap | Dream | 46.573529 | 17.588235 | 191.735294 | 3527.205882 | |
Gentoo | Biscoe | 45.563793 | 14.237931 | 212.706897 | 4679.741379 | |
Male | Adelie | Biscoe | 40.590909 | 19.036364 | 190.409091 | 4050.000000 |
Dream | 40.071429 | 18.839286 | 191.928571 | 4045.535714 | ||
Torgersen | 40.586957 | 19.391304 | 194.913043 | 4034.782609 | ||
Chinstrap | Dream | 51.094118 | 19.252941 | 199.911765 | 3938.970588 | |
Gentoo | Biscoe | 49.473770 | 15.718033 | 221.540984 | 5484.836066 |
'mean').reorder_levels([2, 1, 0], axis=0).sort_index() grouped.agg(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
sex | island | species | ||||
Female | Biscoe | Adelie | 37.359091 | 17.704545 | 187.181818 | 3369.318182 |
Gentoo | 45.563793 | 14.237931 | 212.706897 | 4679.741379 | ||
Dream | Adelie | 36.911111 | 17.618519 | 187.851852 | 3344.444444 | |
Chinstrap | 46.573529 | 17.588235 | 191.735294 | 3527.205882 | ||
Torgersen | Adelie | 37.554167 | 17.550000 | 188.291667 | 3395.833333 | |
Male | Biscoe | Adelie | 40.590909 | 19.036364 | 190.409091 | 4050.000000 |
Gentoo | 49.473770 | 15.718033 | 221.540984 | 5484.836066 | ||
Dream | Adelie | 40.071429 | 18.839286 | 191.928571 | 4045.535714 | |
Chinstrap | 51.094118 | 19.252941 | 199.911765 | 3938.970588 | ||
Torgersen | Adelie | 40.586957 | 19.391304 | 194.913043 | 4034.782609 |
“partial” label identifying a subgroup in the data
= grouped.agg(['mean', 'median'])
grouped_df grouped_df
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 | ||
Chinstrap | Dream | Female | 46.573529 | 46.30 | 17.588235 | 17.65 | 191.735294 | 192.0 | 3527.205882 | 3550.0 |
Male | 51.094118 | 50.95 | 19.252941 | 19.30 | 199.911765 | 200.5 | 3938.970588 | 3950.0 | ||
Gentoo | Biscoe | Female | 45.563793 | 45.50 | 14.237931 | 14.25 | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
'Adelie', :] grouped_df.loc[
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | ||||||
---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | ||
island | sex | ||||||||
Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | |
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | |
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 |
'Adelie', "body_mass_g"] # works for the level 0 index on the rows or the columns grouped_df.loc[
mean | median | ||
---|---|---|---|
island | sex | ||
Biscoe | Female | 3369.318182 | 3375.0 |
Male | 4050.000000 | 4000.0 | |
Dream | Female | 3344.444444 | 3400.0 |
Male | 4045.535714 | 3987.5 | |
Torgersen | Female | 3395.833333 | 3400.0 |
Male | 4034.782609 | 4000.0 |
'Adelie', "mean"] # works for the level 0 index on the rows or the columns grouped_df.loc[
KeyError: 'mean'
'Adelie', 'Gentoo'], ['flipper_length_mm', 'body_mass_g']] grouped_df.loc[[
flipper_length_mm | body_mass_g | |||||
---|---|---|---|---|---|---|
mean | median | mean | median | |||
species | island | sex | ||||
Adelie | Biscoe | Female | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 194.913043 | 195.0 | 4034.782609 | 4000.0 | ||
Gentoo | Biscoe | Female | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
Cross-sections: Using .xs
to selet subset of rows at a given level
'Adelie', level=0) grouped_df.xs(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | ||||||
---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | ||
island | sex | ||||||||
Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | |
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | |
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 |
'Adelie', level=0, drop_level=False) grouped_df.xs(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 |
'Male', level=2, drop_level=False) grouped_df.xs(
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 |
Dream | Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | |
Torgersen | Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 | |
Chinstrap | Dream | Male | 51.094118 | 50.95 | 19.252941 | 19.30 | 199.911765 | 200.5 | 3938.970588 | 3950.0 |
Gentoo | Biscoe | Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
'Adelie', 'Dream'), level=(0, 1), drop_level=False) grouped_df.xs((
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 |
'Adelie', 'Male'), level=(0, 2), drop_level=False) grouped_df.xs((
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 |
Dream | Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | |
Torgersen | Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 |
using pd.indexSlice
to use : and slices
grouped_df
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Dream | Female | 36.911111 | 36.80 | 17.618519 | 17.80 | 187.851852 | 188.0 | 3344.444444 | 3400.0 | |
Male | 40.071429 | 40.25 | 18.839286 | 18.65 | 191.928571 | 190.5 | 4045.535714 | 3987.5 | ||
Torgersen | Female | 37.554167 | 37.60 | 17.550000 | 17.45 | 188.291667 | 189.0 | 3395.833333 | 3400.0 | |
Male | 40.586957 | 41.10 | 19.391304 | 19.20 | 194.913043 | 195.0 | 4034.782609 | 4000.0 | ||
Chinstrap | Dream | Female | 46.573529 | 46.30 | 17.588235 | 17.65 | 191.735294 | 192.0 | 3527.205882 | 3550.0 |
Male | 51.094118 | 50.95 | 19.252941 | 19.30 | 199.911765 | 200.5 | 3938.970588 | 3950.0 | ||
Gentoo | Biscoe | Female | 45.563793 | 45.50 | 14.237931 | 14.25 | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
= pd.IndexSlice
idx
grouped_df.loc['Biscoe', :], # rows
idx[:, # columns
idx[:, :] ]
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | median | mean | median | mean | median | mean | median | |||
species | island | sex | ||||||||
Adelie | Biscoe | Female | 37.359091 | 37.75 | 17.704545 | 17.70 | 187.181818 | 187.0 | 3369.318182 | 3375.0 |
Male | 40.590909 | 40.80 | 19.036364 | 18.90 | 190.409091 | 191.0 | 4050.000000 | 4000.0 | ||
Gentoo | Biscoe | Female | 45.563793 | 45.50 | 14.237931 | 14.25 | 212.706897 | 212.0 | 4679.741379 | 4700.0 |
Male | 49.473770 | 49.50 | 15.718033 | 15.70 | 221.540984 | 221.0 | 5484.836066 | 5500.0 |
grouped_df.loc["Adelie", :, "Male"], # rows
idx["flipper_length_mm", "body_mass_g"], "median"] # columns
idx[[ ]
flipper_length_mm | body_mass_g | |||
---|---|---|---|---|
median | median | |||
species | island | sex | ||
Adelie | Biscoe | Male | 191.0 | 4000.0 |
Dream | Male | 190.5 | 3987.5 | |
Torgersen | Male | 195.0 | 4000.0 |
Sorting index for efficient slicing
= grouped_df.loc[idx[:, ['Dream', 'Biscoe'], ['Male']], idx[:, 'mean']]
subset_df subset_df
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
mean | mean | mean | mean | |||
species | island | sex | ||||
Adelie | Dream | Male | 40.071429 | 18.839286 | 191.928571 | 4045.535714 |
Chinstrap | Dream | Male | 51.094118 | 19.252941 | 199.911765 | 3938.970588 |
Adelie | Biscoe | Male | 40.590909 | 19.036364 | 190.409091 | 4050.000000 |
Gentoo | Biscoe | Male | 49.473770 | 15.718033 | 221.540984 | 5484.836066 |
subset_df.index.is_monotonic_increasing
False
# if not monotonic increasing, please sort it for me subset_df.sort_index()
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
mean | mean | mean | mean | |||
species | island | sex | ||||
Adelie | Biscoe | Male | 40.590909 | 19.036364 | 190.409091 | 4050.000000 |
Dream | Male | 40.071429 | 18.839286 | 191.928571 | 4045.535714 | |
Chinstrap | Dream | Male | 51.094118 | 19.252941 | 199.911765 | 3938.970588 |
Gentoo | Biscoe | Male | 49.473770 | 15.718033 | 221.540984 | 5484.836066 |
=0)["Adelie":"Chinstrap", :, :] subset_df.loc(axis
UnsortedIndexError: 'MultiIndex slicing requires the index to be lexsorted: slicing on levels [0], lexsort depth 0'
=0)["Adelie":"Chinstrap", :, :] subset_df.sort_index().loc(axis
bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | |||
---|---|---|---|---|---|---|
mean | mean | mean | mean | |||
species | island | sex | ||||
Adelie | Biscoe | Male | 40.590909 | 19.036364 | 190.409091 | 4050.000000 |
Dream | Male | 40.071429 | 18.839286 | 191.928571 | 4045.535714 | |
Chinstrap | Dream | Male | 51.094118 | 19.252941 | 199.911765 | 3938.970588 |
'a', 'b', 'c', 'd'][0:2] [
['a', 'b']