Google Trends#
import os
os.chdir("../../")
import inspect
import numpy as np
import pandas as pd
from src.google_trends import GT
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, CustomJS, Select, HoverTool
from bokeh.plotting import figure, show, output_notebook
output_notebook()
The following code was modified from Development Data Partership. See example code.
print(inspect.getsource(GT))
class GT:
def __init__(self, _GOOGLE_API_KEY):
self.service = build(
serviceName=SERVICE_NAME,
version=SERVICE_VERSION,
discoveryServiceUrl=_DISCOVERY_SERVICE_URL,
developerKey=_GOOGLE_API_KEY,
cache_discovery=False)
self.block_until = None
def get_health_trends(self, terms, timelineResolution="month"):
graph = self.service.getTimelinesForHealth(
terms=terms,
timelineResolution=timelineResolution
)
try:
response = graph.execute()
return response
except HttpError as http_error:
data = json.loads(http_error.content.decode('utf-8'))
code = data['error']['code']
reason = data['error']['errors'][0]['reason']
if code == 403 and reason == 'dailyLimitExceeded':
self.block_until = datetime.combine(
date.today() + timedelta(days=1), dtime.min)
raise RuntimeError('%s: blocked until %s' %
(reason, self.block_until))
logging.warning(http_error)
return []
def get_graph(self, terms,
restrictions_geo,
restrictions_startDate="2004-01"):
graph = self.service.getGraph(
terms=terms,
restrictions_geo=restrictions_geo,
restrictions_startDate=restrictions_startDate
)
try:
response = graph.execute()
return response
except HttpError as http_error:
logging.warning(http_error)
return []
def get_top_topics(self, term,
restrictions_geo,
restrictions_startDate="2004-01"):
graph = self.service.getTopTopics(
term=term,
restrictions_geo=restrictions_geo,
restrictions_startDate=restrictions_startDate
)
try:
response = graph.execute()
return response
except Exception as e:
logging.warning(e)
return []
@staticmethod
def to_df(result: json) -> pd.DataFrame:
df = pd.json_normalize(result["lines"], meta=[
"term"], record_path=["points"])
if "date" in df.columns:
df["date"] = pd.to_datetime(df["date"])
return df
folder = os.getcwd() + "/data/tourism/trends/"
filepaths = [folder + path for path in os.listdir(folder)]
GoogleAPI = os.getenv("GoogleAPIkey")
trends = pd.DataFrame()
for file in filepaths:
if "readme" not in file:
country_trend = (pd.read_csv(file).drop("Unnamed: 0", axis=1)
.iloc[:, [0, -3, -2, -1]])
country_trend.columns = [col.replace(" ", "_").lower()
for col in country_trend.columns]
country_trend["date"] = pd.to_datetime(country_trend["date"])
if trends.empty:
trends = country_trend
else:
trends = trends.merge(country_trend, how="left", on="date")
display(trends.head(5))
date | palau_flights | palau_hotel | palau_travel | solomon_islands_flights | solomon_islands_hotel | solomon_islands_travel | tonga_flights | tonga_hotel | tonga_travel | vanuatu_flights | vanuatu_hotel | vanuatu_travel | fiji_flights | fiji_hotel | fiji_travel | samoa_flights | samoa_hotel | samoa_travel | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2004-01-01 | 0.000000 | 4.374890 | 1.137126 | 0.000000 | 1.511560 | 0.0 | 2.880180 | 1.414027 | 1.422723 | 0.000000 | 1.641851 | 4.364237 | 7.736326 | 12.222561 | 21.884040 | 0.000000 | 1.130191 | 1.297947 |
1 | 2004-02-01 | 0.000000 | 4.834610 | 0.000000 | 0.923086 | 0.883077 | 0.0 | 4.318772 | 6.542797 | 0.000000 | 0.896698 | 1.434310 | 2.362955 | 8.971634 | 13.282344 | 19.926081 | 4.138847 | 5.995897 | 1.808004 |
2 | 2004-03-01 | 0.000000 | 6.227588 | 1.372316 | 0.000000 | 1.216019 | 0.0 | 0.000000 | 4.433156 | 0.000000 | 4.973685 | 2.122763 | 4.711485 | 11.508566 | 12.604433 | 18.692017 | 0.000000 | 0.000000 | 2.331247 |
3 | 2004-04-01 | 0.000000 | 4.139832 | 0.000000 | 0.994321 | 2.589010 | 0.0 | 0.000000 | 0.927113 | 0.000000 | 1.273081 | 0.000000 | 3.575172 | 6.667417 | 8.880059 | 15.798739 | 0.000000 | 2.146069 | 0.000000 |
4 | 2004-05-01 | 0.900479 | 6.006549 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.742281 | 1.906869 | 3.958648 | 0.980399 | 1.270780 | 2.740211 | 9.652304 | 13.673188 | 15.091649 | 0.000000 | 2.409763 | 0.000000 |
Show code cell source
source = ColumnDataSource({c: v.values for c, v in trends.items()})
# Select Widget
initial_value = "palau_travel"
col_lst = [col for col in trends.columns if col.endswith("travel")]
p = figure(height=600, width=880, x_axis_type="datetime")
line = p.line("date", initial_value, source=source, name=initial_value)
dropdown = Select(title="Country", value=initial_value,
options=col_lst, width=200)
dropdown.js_on_change("value", CustomJS(args=dict(line=line),
code="line.glyph.y = {field: cb_obj.value};"))
show(column(dropdown, p))