Aviation Data

Aviation Data#

import os
os.chdir("../../")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import itertools

from bokeh.palettes import Spectral10
from bokeh.plotting import show, figure, output_notebook
from bokeh.models import CustomJS, Select, ColumnDataSource, HoverTool, BoxZoomTool, ResetTool
from bokeh.layouts import row, column, Spacer

output_notebook()
Loading BokehJS ...
ad = pd.read_excel(os.getcwd() +"/data/tourism/aviation_seats_flights_pic.xlsx")
ad.columns = [col.lower() for col in ad.columns]
ad["date"] = pd.to_datetime(ad["date"])
ad.head(5)
country iso region date aircraft_type seats_arrivals_domestic seats_arrivals_interregional seats_arrivals_intraregional seats_arrivals_intl seats_arrivals_total available_seat_kilometers number_of_flights_domestic number_of_flights_interregional number_of_flights_intraregional number_of_flights_intl number_of_flights_total
0 Fiji FJ East Asia & Pacific 2019-01-01 passenger 839 273 3480 3753 4592 1.430416e+07 8 1 10 11 19
1 Fiji FJ East Asia & Pacific 2019-01-02 passenger 974 313 3471 3784 4758 1.495610e+07 8 1 10 11 19
2 Fiji FJ East Asia & Pacific 2019-01-03 passenger 1190 443 3675 4118 5308 1.592143e+07 10 2 12 14 24
3 Fiji FJ East Asia & Pacific 2019-01-04 passenger 831 586 3159 3745 4576 1.457334e+07 7 2 12 14 21
4 Fiji FJ East Asia & Pacific 2019-01-05 passenger 744 273 4752 5025 5769 1.773449e+07 7 1 12 13 20
ad.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10611 entries, 0 to 10610
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   country                          10611 non-null  object        
 1   iso                              10611 non-null  object        
 2   region                           10611 non-null  object        
 3   date                             10611 non-null  datetime64[ns]
 4   aircraft_type                    10566 non-null  object        
 5   seats_arrivals_domestic          10611 non-null  int64         
 6   seats_arrivals_interregional     10611 non-null  int64         
 7   seats_arrivals_intraregional     10611 non-null  int64         
 8   seats_arrivals_intl              10611 non-null  int64         
 9   seats_arrivals_total             10611 non-null  int64         
 10  available_seat_kilometers        10611 non-null  float64       
 11  number_of_flights_domestic       10611 non-null  int64         
 12  number_of_flights_interregional  10611 non-null  int64         
 13  number_of_flights_intraregional  10611 non-null  int64         
 14  number_of_flights_intl           10611 non-null  int64         
 15  number_of_flights_total          10611 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(10), object(4)
memory usage: 1.3+ MB
select_cols = ["country", "date", "seats_arrivals_intl", "seats_arrivals_total",
               "number_of_flights_intl", "number_of_flights_total"]

# Subset to the passenger
ad = (ad[ad["aircraft_type"] == "passenger"][select_cols]
      .reset_index().drop("index", axis=1))
display(ad.head(5))
country date seats_arrivals_intl seats_arrivals_total number_of_flights_intl number_of_flights_total
0 Fiji 2019-01-01 3753 4592 11 19
1 Fiji 2019-01-02 3784 4758 11 19
2 Fiji 2019-01-03 4118 5308 14 24
3 Fiji 2019-01-04 3745 4576 14 21
4 Fiji 2019-01-05 5025 5769 13 20
daterange = pd.date_range(start=ad.date.min(),
                          end=ad.date.max(),
                          freq="D").tolist()
countries = ad.country.unique().tolist()
comb = itertools.product(daterange, countries)

date_df = pd.DataFrame()
for i in comb:
    row = pd.DataFrame(i).T
    date_df = pd.concat([date_df, row], axis=0)
date_df.columns = ["date", "country"]
ad = date_df.merge(ad, how="left").fillna(0)
ad.sample(5)
date country seats_arrivals_intl seats_arrivals_total number_of_flights_intl number_of_flights_total
5942 2020-05-10 Marshall Islands (the) 0.0 0.0 0.0 0.0
6235 2020-06-03 Samoa 0.0 0.0 0.0 0.0
10773 2021-06-16 Tonga 302.0 302.0 1.0 1.0
7530 2020-09-19 Papua New Guinea 376.0 3467.0 2.0 29.0
2751 2019-08-18 Micronesia (Federated States of) 0.0 239.0 0.0 2.0
ad_7dsum = pd.DataFrame()
for country in countries:
    country_df = (ad[ad.country == country].sort_values(by="date")
                  .reset_index().drop("index", axis=1))
    country_df.iloc[:, -4:] = country_df.iloc[:, -4:].rolling(window=7).sum()
    ad_7dsum = pd.concat([ad_7dsum, country_df], axis=0)
nof_7dsum = (ad_7dsum[["date", "country", "seats_arrivals_intl"]]
             .pivot(index="date", columns="country", values="seats_arrivals_intl")
             .reset_index())
nof_7dsum.columns.name = None
nof_7dsum.sample(5)
date Fiji Kiribati Marshall Islands (the) Micronesia (Federated States of) Nauru Palau Papua New Guinea Samoa Solomon Islands Tonga Tuvalu Vanuatu
822 2021-04-02 3477.0 170.0 0.0 166.0 0.0 482.0 2745.0 302.0 306.0 438.0 0.0 818.0
1038 2021-11-04 6066.0 0.0 332.0 498.0 144.0 482.0 2926.0 0.0 772.0 275.0 0.0 920.0
1214 2022-04-29 17561.0 144.0 166.0 332.0 0.0 498.0 3661.0 599.0 288.0 476.0 0.0 0.0
222 2019-08-11 25127.0 714.0 996.0 1361.0 0.0 3416.0 8372.0 5169.0 2234.0 3293.0 0.0 3612.0
1171 2022-03-17 10458.0 484.0 346.0 332.0 144.0 332.0 3087.0 472.0 288.0 302.0 0.0 580.0
Hide code cell source
cds = ColumnDataSource(nof_7dsum)
render_cds = ColumnDataSource(
    {"x": nof_7dsum["date"],
     "y": nof_7dsum["Fiji"],
     }
)

countries = nof_7dsum.columns[~nof_7dsum.columns.isin(["date"])].tolist()

# Hovertool config
hover = HoverTool(tooltips=[('Date', '@x{%Y-%m-%d}'),
                            ('Number', '@y')],
                  formatters={'@x': 'datetime'})

p = figure(width=800, height=500, x_axis_type="datetime",
           tools=[hover, BoxZoomTool(), ResetTool()])
p.line(x="x", y="y", source=render_cds)


# Selector
country_select = Select(title="Select Country", width=300,
                        options=countries, value="Country")

jscode = """
// New data
render_cds.data['y'] = cds.data[country_select.value];
yaxis[0].axis_label = country_select.value;
render_cds.change.emit();
"""

args = dict(
    render_cds=render_cds,
    cds=cds,
    country_select=country_select,
    yaxis=p.yaxis,
)
country_select.js_on_change("value", CustomJS(code=jscode, args=args))

layout = column(country_select, Spacer(height=30), p)
show(layout)