nycflights13 package

import seaborn as sns
import pandas as pd
import plotly.express as px

The nycflights13 package

The nycflights13 python package gives quick access to several dataframes related to flights, airports, airlines and weather data out of New York in 2013. Useful to illustrate how to aggregate, enrich and merge dataframes to create meaningful visualizations.

from nycflights13 import flights, airports, airlines, planes, weather
flights.head()
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin dest air_time distance hour minute time_hour
0 2013 1 1 517.0 515 2.0 830.0 819 11.0 UA 1545 N14228 EWR IAH 227.0 1400 5 15 2013-01-01T10:00:00Z
1 2013 1 1 533.0 529 4.0 850.0 830 20.0 UA 1714 N24211 LGA IAH 227.0 1416 5 29 2013-01-01T10:00:00Z
2 2013 1 1 542.0 540 2.0 923.0 850 33.0 AA 1141 N619AA JFK MIA 160.0 1089 5 40 2013-01-01T10:00:00Z
3 2013 1 1 544.0 545 -1.0 1004.0 1022 -18.0 B6 725 N804JB JFK BQN 183.0 1576 5 45 2013-01-01T10:00:00Z
4 2013 1 1 554.0 600 -6.0 812.0 837 -25.0 DL 461 N668DN LGA ATL 116.0 762 6 0 2013-01-01T11:00:00Z
airports.head()
faa name lat lon alt tz dst tzone
0 04G Lansdowne Airport 41.130472 -80.619583 1044 -5 A America/New_York
1 06A Moton Field Municipal Airport 32.460572 -85.680028 264 -6 A America/Chicago
2 06C Schaumburg Regional 41.989341 -88.101243 801 -6 A America/Chicago
3 06N Randall Airport 41.431912 -74.391561 523 -5 A America/New_York
4 09J Jekyll Island Airport 31.074472 -81.427778 11 -5 A America/New_York
planes.head()
tailnum year type manufacturer model engines seats speed engine
0 N10156 2004.0 Fixed wing multi engine EMBRAER EMB-145XR 2 55 NaN Turbo-fan
1 N102UW 1998.0 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NaN Turbo-fan
2 N103US 1999.0 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NaN Turbo-fan
3 N104UW 1999.0 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NaN Turbo-fan
4 N10575 2002.0 Fixed wing multi engine EMBRAER EMB-145LR 2 55 NaN Turbo-fan
airlines.head()
carrier name
0 9E Endeavor Air Inc.
1 AA American Airlines Inc.
2 AS Alaska Airlines Inc.
3 B6 JetBlue Airways
4 DL Delta Air Lines Inc.
weather.head()
origin year month day hour temp dewp humid wind_dir wind_speed wind_gust precip pressure visib time_hour
0 EWR 2013 1 1 1 39.02 26.06 59.37 270.0 10.35702 NaN 0.0 1012.0 10.0 2013-01-01T06:00:00Z
1 EWR 2013 1 1 2 39.02 26.96 61.63 250.0 8.05546 NaN 0.0 1012.3 10.0 2013-01-01T07:00:00Z
2 EWR 2013 1 1 3 39.02 28.04 64.43 240.0 11.50780 NaN 0.0 1012.5 10.0 2013-01-01T08:00:00Z
3 EWR 2013 1 1 4 39.92 28.04 62.21 250.0 12.65858 NaN 0.0 1012.2 10.0 2013-01-01T09:00:00Z
4 EWR 2013 1 1 5 39.02 28.04 64.43 260.0 12.65858 NaN 0.0 1011.9 10.0 2013-01-01T10:00:00Z

Illustration: counting the number of flights per airport

# count number of flights per airport
series_number_of_flights = flights[['dest']].groupby('dest').size()
series_number_of_flights.name = 'Count flights'
series_number_of_flights
dest
ABQ      254
ACK      265
ALB      439
ANC        8
ATL    17215
       ...  
TPA     7466
TUL      315
TVC      101
TYS      631
XNA     1036
Name: Count flights, Length: 105, dtype: int64
airports_with_flight_counts = airports.merge(series_number_of_flights,
                                            left_on='faa',
                                            right_index=True)
airports_with_flight_counts
faa name lat lon alt tz dst tzone Count flights
87 ABQ Albuquerque International Sunport 35.040222 -106.609194 5355 -7 A America/Denver 254
91 ACK Nantucket Mem 41.253053 -70.060181 48 -5 A America/New_York 265
118 ALB Albany Intl 42.748267 -73.801692 285 -5 A America/New_York 439
128 ANC Ted Stevens Anchorage Intl 61.174361 -149.996361 152 -9 A America/Anchorage 8
153 ATL Hartsfield Jackson Atlanta Intl 33.636719 -84.428067 1026 -5 A America/New_York 17215
... ... ... ... ... ... ... ... ... ...
1327 TPA Tampa Intl 27.975472 -82.533250 26 -5 A America/New_York 7466
1334 TUL Tulsa Intl 36.198389 -95.888111 677 -6 A America/Chicago 315
1337 TVC Cherry Capital Airport 44.741445 -85.582235 624 -5 A America/New_York 101
1347 TYS Mc Ghee Tyson 35.810972 -83.994028 981 -5 A America/New_York 631
1430 XNA NW Arkansas Regional 36.281869 -94.306811 1287 -6 A America/Chicago 1036

101 rows × 9 columns

# SFO and LAX (large airport)
airports_with_flight_counts.query('faa == "LAX" or faa == "SFO"')
faa name lat lon alt tz dst tzone Count flights
770 LAX Los Angeles Intl 33.942536 -118.408075 126 -8 A America/Los_Angeles 16174
1216 SFO San Francisco Intl 37.618972 -122.374889 13 -8 A America/Los_Angeles 13331

Mean delay per airport

series_airport_with_mean_delay = flights[['dest', 'dep_delay']].groupby('dest').mean()
series_airport_with_mean_delay.name = 'delay'
series_airport_with_mean_delay.head()
dep_delay
dest
ABQ 13.740157
ACK 6.456604
ALB 23.620525
ANC 12.875000
ATL 12.509824

Mean delay and number of fliths, on a map

airports_with_flight_counts_with_delays = airports.merge(
    series_number_of_flights,
    left_on='faa',
    right_index=True
).merge(
        series_airport_with_mean_delay,
        left_on='faa',
        right_index=True
)
airports_with_flight_counts_with_delays.query('faa == "LAX" or faa == "SFO"')
faa name lat lon alt tz dst tzone Count flights dep_delay
770 LAX Los Angeles Intl 33.942536 -118.408075 126 -8 A America/Los_Angeles 16174 9.401344
1216 SFO San Francisco Intl 37.618972 -122.374889 13 -8 A America/Los_Angeles 13331 12.866289
fig = px.scatter_geo(airports_with_flight_counts_with_delays,
                     lat='lat', lon='lon',
                     hover_name="name",
                     fitbounds="locations",
                     size='Count flights',
                     color='dep_delay'
                    )
fig.show()