In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Span
from shapely.geometry import Point
import geopandas as gpd
import glob
import bokeh
from datetime import datetime
from bokeh.layouts import column
from bokeh.models import Legend, Tabs, TabPanel
from bokeh.core.validation.warnings import MISSING_RENDERERS, EMPTY_LAYOUT

# Set fonts for matplotlib
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 14

In [2]:

bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(EMPTY_LAYOUT, True)



In [3]:
# Function to convert a pandas dataframe to a geopandas dataframe
def convert_to_gdf(df):
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

    return gdf

In [4]:
# Read shapefiles from HdX UNOCHA
bgd_adm2 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm2_bbs_20201113.shp"
)

# Only keep needed variables 
bgd_adm2 = bgd_adm2[["ADM2_EN","ADM1_EN","ADM0_EN",  "geometry"]]

bgd_adm1 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm1_bbs_20201113.shp"
)

# **Movement Distribution Analysis**

The **Movement Distribution** dataset tracks mobility patterns using Facebook mobile app data, spanning **December 1, 2022, to December 1, 2024**. This dataset provides insights into:
- **Average travel distances from home.**
- **Mobility changes in response to events like public health messaging, policies, or crises.**

---

## **Purpose**
Analyze movement patterns in Bangladesh over the past two years, with a focus on the recent political crisis.

---

## **Objectives**
1. **District-Level Trends:**  
   Explore how mobility patterns vary across key districts in Bangladesh.  

2. **National Trends:**  
   Analyze changes in mobility categories (e.g., 0 km, 10â€“100 km) at the national level over time.  

3. **Baseline Movement:**  
   Year-by-year, day-by-day comparison using baseline period (December 2022 - November 2023).

4. **Mobility Maps:**  
   Visualize changes in mobility patterns by admin-2 regions to highlight shifts in people staying close to home.



In [8]:
# Read the movement between places during crisis data
all_files = glob.glob("../../data/movement-analysis/raw/movement-distribution/*.csv")

li = [] 

for file in all_files:
    df1 = pd.read_csv(file)
    li.append(df1)

# Concatenate all the data into one DataFrame
movementDistribution = pd.concat(li, axis=0)

In [9]:
# Filter the dataset for Bangladesh
movementDistribution = movementDistribution[movementDistribution["country"] == "BGD"]

# Save as CSV 
movementDistribution.to_csv("../../data/movement-analysis/processed/movement-distribution/movement_distribution_bgd.csv", index=False)

        gadm_id  gadm_name country  polygon_level  \
1732  BGD.3.3_1    Gazipur     BGD              2   
1734  BGD.1.4_1  Jhalokati     BGD              2   
1750  BGD.1.4_1  Jhalokati     BGD              2   
1840  BGD.3.3_1    Gazipur     BGD              2   
1918  BGD.3.3_1    Gazipur     BGD              2   

     home_to_ping_distance_category  distance_category_ping_fraction  \
1732                              0                         0.416746   
1734                           100+                         0.004712   
1750                      [10, 100)                         0.027272   
1840                           100+                         0.001829   
1918                        (0, 10)                         0.556109   

              ds  
1732  2022-12-01  
1734  2022-12-01  
1750  2022-12-01  
1840  2022-12-01  
1918  2022-12-01  


In [5]:
# If you already have movement distribution BGD start running from this point

# Read the movement distribution data
movementDistribution = pd.read_csv("../../data/movement-analysis/processed/movement-distribution/movement_distribution_bgd.csv")

# Change the admin column to upper case
movementDistribution["ADM2_EN"] = movementDistribution["gadm_name"].str.upper()

In [6]:
# 1. I am using the population during crisis data, that contains n_baseline information on the number of users with their lcoations turn on in a baseline period
populationBaseline = pd.read_csv("../../data/movement-analysis/raw/population-during-crisis/2515052155094649_2024-08-25_0000.csv")

In [7]:
# Steps to convert the movement distribution to national level

# 1. I am using the population during crisis data, that contains n_baseline information on the number of users with their lcoations turn on in a baseline period
populationBaseline = pd.read_csv("../../data/movement-analysis/raw/population-during-crisis/2515052155094649_2024-08-25_0000.csv")

# 2. Convert to geoDataFrame 

populationBaseline = convert_to_gdf(populationBaseline)

# 3. Filter only one date and country

populationBaseline = populationBaseline[populationBaseline["ds"] == "2024-08-25"]
populationBaseline = populationBaseline[populationBaseline["country"] == "BD"]

# 4. Spatial join the population baseline data with the adm2 shapefile to get the adm2 level population

joined_data = gpd.sjoin(populationBaseline, bgd_adm2, how="left", predicate="intersects")

# 5. Group the movement distribution data by date and destination_adm2_name

aggregated_data = joined_data.groupby("ADM2_EN")["n_baseline"].sum().reset_index()

# 6. calculate weights for national aggregation
total_users = aggregated_data["n_baseline"].sum()
aggregated_data["weights"] = aggregated_data["n_baseline"] / total_users

# Change admin column to upper case
aggregated_data["ADM2_EN"] = aggregated_data["ADM2_EN"].str.upper()

# write the aggregated data to a csv

aggregated_data.to_csv("../../data/movement-analysis/processed/movement-distribution/aggregated_data.csv", index=False)

# Merge the movement distribution data with the aggregated data to add weights variable

movementDistribution = movementDistribution.merge(aggregated_data, left_on="ADM2_EN", right_on="ADM2_EN", how="left")

In [9]:
from bokeh.palettes import Set2
from bokeh.models import Span, Label, ColumnDataSource, HoverTool, Div, Legend, LegendItem
from bokeh.layouts import column
from bokeh.plotting import figure


def create_line_plot(data, x_column, y_column, category_column, title, x_label, y_label,
                                 subtitle=None, source=None, event_dates=None, event_labels=None):
    """
    Creates a Bokeh line plot with multiple lines for different categories and optional vertical event markers.
    
    Parameters:
    - data: DataFrame, input data
    - x_column: str, column for x-axis values
    - y_column: str, column for y-axis values
    - category_column: str, column for categories to create separate lines
    - title: str, plot title
    - x_label: str, label for x-axis
    - y_label: str, label for y-axis
    - subtitle: str, optional subtitle
    - source: str, optional data source
    - event_dates: list of datetime, optional dates for vertical lines
    - event_labels: list of str, optional labels for the vertical lines
    
    Returns:
    - Bokeh layout object
    """
    # Ensure the x_column and y_column are properly formatted for the source
    data = data.copy()
    data[x_column] = pd.to_datetime(data[x_column])  # Ensure the x_column is datetime
    
    # Initialize the figure
    p = figure(
        x_axis_type="datetime",
        title=title,
        width=800,
        height=400,
        toolbar_location="above"
    )
    
    # Define the desired category order
    category_order = ["0", "(0, 10)", "[10, 100)", "100+"]
    
    # Define color palette
    colors = Set2[len(category_order)]  # Use palette for all categories
    
    legend_items = []  # To store legend items in the desired order
    
    # Loop through each category in the desired order and add a line
    for i, category in enumerate(category_order):
        category_data = data[data[category_column] == category]
        if not category_data.empty:
            category_source = ColumnDataSource(category_data)
            line = p.line(
                x=x_column,
                y=y_column,
                source=category_source,
                line_width=2,
                color=colors[i]
            )
            # Store legend item
            legend_items.append((str(category), [line]))
    
    # Add vertical event lines and labels if provided
    if event_dates and event_labels:
        for date, label in zip(event_dates, event_labels):
            vline = Span(location=date.timestamp() * 1000,  # Convert datetime to milliseconds
                         dimension="height", line_color="gray", line_dash="dashed", line_width=2)
            p.add_layout(vline)
            event_label = Label(x=date.timestamp() * 1000, y=0, text=label, text_font_size="10pt",
                                text_align="left", text_baseline="bottom", angle=45,
                                x_offset=5, y_offset=5)
            p.add_layout(event_label)
    
    # Set the legend explicitly with the desired order
    legend = Legend(items=legend_items, title="Distance Categories", click_policy="hide")
    p.add_layout(legend, "right")
    
    # Add hover tool
    hover = HoverTool(
        tooltips=[
            (x_label, f"@{x_column}{{%F}}"),
            (y_label, f"@{y_column}"),
            ("Category", f"@{category_column}")
        ],
        formatters={f"@{x_column}": "datetime"}
    )
    p.add_tools(hover)
    
    # Style the plot
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    
    # Use Div for title and subtitle
    title_div = Div(
        text=f"<h2>{title}</h2>", 
        styles={"text-align": "left", "font-size": "14pt", "font-weight": "bold"}
    )
    subtitle_div = Div(
        text=f"<p><em>{subtitle}</em></p>" if subtitle else "",
        styles={"text-align": "left", "font-size": "12pt", "margin-bottom": "10px"}
    )
    source_div = Div(
        text=f"<p><small>{source}</small></p>",
        styles={"text-align": "left", "font-size": "12pt", "margin-top": "10px"}
    )
    
    # Combine all elements
    layout = column(title_div, subtitle_div, p, source_div)
    return layout



In [10]:
# Daily data 
movementDistribution['date'] = pd.to_datetime(movementDistribution['ds'], format='%Y-%m-%d')

daily_data = movementDistribution.copy()
daily_data['weighted_ping_fraction'] = (
    movementDistribution['distance_category_ping_fraction'] * movementDistribution['weights']
)

national_daily_data = (
    daily_data.groupby(['date', 'home_to_ping_distance_category'])
    .agg({'weighted_ping_fraction': 'sum'})  # Sum weighted fractions across all regions
    .reset_index()
)

# Rename columns for clarity
national_daily_data.rename(
    columns={'weighted_ping_fraction': 'national_ping_fraction'},
    inplace=True
)


In [45]:
#national_daily_data to csv in processed folder 

national_daily_data.to_csv("../../data/movement-analysis/processed/movement-distribution/national_daily_data.csv", index=False)

In [11]:
## Weekly data
# Step 1: Convert the date column to datetime and add necessary columns
movementDistribution['date'] = pd.to_datetime(movementDistribution['ds'], format='%Y-%m-%d')
movementDistribution['week_of_year'] = movementDistribution['date'].dt.isocalendar().week
movementDistribution['week'] = movementDistribution['date'].dt.isocalendar().week
movementDistribution['year'] = movementDistribution['date'].dt.year

# Step 2: Filter for Tuesday to Thursday
filtered_data = movementDistribution[
    movementDistribution['date'].dt.dayofweek.isin([0, 6, 1, 2])  # Sunday to Wednesday
]

# Step 3: Aggregate weekly data at admin-2 level
weekly_data = (
    filtered_data.groupby(['year', 'week', 'ADM2_EN', 'home_to_ping_distance_category', 'weights'])
    .agg({'distance_category_ping_fraction': 'mean'})  # Average ping fractions over the week
    .reset_index()
)

# Add a combined week identifier for plotting
weekly_data['week_identifier'] = (
    weekly_data['year'].astype(str) + "-W" + weekly_data['week'].astype(str)
)

# Convert the week identifier to a datetime format for consistent plotting
weekly_data['week_start'] = pd.to_datetime(
    weekly_data['week_identifier'] + "-1", format="%G-W%V-%u"
)

# Step 4: Calculate `weighted_ping_fraction` for national aggregation
weekly_data['weighted_ping_fraction'] = (
    weekly_data['distance_category_ping_fraction'] * weekly_data['weights']
)

# Step 5: Aggregate to national level
national_weekly_data = (
    weekly_data.groupby(['year', 'week','week_start', 'home_to_ping_distance_category'])
    .agg({'weighted_ping_fraction': 'sum'})  # Sum weighted fractions across all regions
    .reset_index()
)

# Rename columns for clarity
national_weekly_data.rename(
    columns={'weighted_ping_fraction': 'national_ping_fraction'},
    inplace=True
)


In [12]:
# Step 1: Convert the date column to datetime and add month/year columns
movementDistribution['date'] = pd.to_datetime(movementDistribution['ds'], format='%Y-%m-%d')
movementDistribution['month'] = movementDistribution['date'].dt.month
movementDistribution['year'] = movementDistribution['date'].dt.year

# Step 2: Filter for Tuesday to Thursday
filtered_data = movementDistribution[
    movementDistribution['date'].dt.dayofweek.isin([6, 0, 1, 2])  
]

# Step 3: Aggregate monthly data at admin-2 level
monthly_data = (
    filtered_data.groupby(['year', 'month', 'ADM2_EN', 'home_to_ping_distance_category', 'weights'])
    .agg({'distance_category_ping_fraction': 'mean'})  # Average ping fractions over the month
    .reset_index()
)

# Step 4: Calculate `weighted_ping_fraction` for national aggregation
monthly_data['weighted_ping_fraction'] = (
    monthly_data['distance_category_ping_fraction'] * monthly_data['weights']
)

# Add a combined month identifier for plotting
monthly_data['month_identifier'] = (
    monthly_data['year'].astype(str) + "-" + monthly_data['month'].astype(str).str.zfill(2)
)

# Convert the month identifier to a datetime format (set to the first day of the month)
monthly_data['month_start'] = pd.to_datetime(
    monthly_data['month_identifier'] + "-01", format="%Y-%m-%d"
)

# Step 5: Aggregate to national level
national_monthly_data = (
    monthly_data.groupby(['year', 'month','month_start',  'home_to_ping_distance_category'])
    .agg({'weighted_ping_fraction': 'sum'})  # Sum weighted fractions across all regions
    .reset_index()
)

# Rename columns for clarity
national_monthly_data.rename(
    columns={'weighted_ping_fraction': 'national_ping_fraction'},
    inplace=True
)


 ### Movement in key districts

In [13]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, Slider, CustomJS
from bokeh.layouts import column
from bokeh.io import output_notebook

# Activate notebook output
output_notebook()

# Filter data for Dhaka
dhaka_data = movementDistribution[movementDistribution["gadm_name"] == "Dhaka"]
# filter crisis data for dhaka jul 5 to sept 5
dhaka_data_crisis = dhaka_data[(dhaka_data['date'] >= '2024-07-05') & (dhaka_data['date'] <= '2024-09-05')]
dhaka_data_weekly = weekly_data[weekly_data["ADM2_EN"] == "DHAKA"]
dhaka_data_monthly = monthly_data[monthly_data["ADM2_EN"] == "DHAKA"]

# Filter data for Narsingdi
narsingdi_data = movementDistribution[movementDistribution["gadm_name"] == "Narsingdi"]
# filter crisis data for dhaka jul 5 to sept 5
narsingdi_data_crisis = narsingdi_data[(narsingdi_data['date'] >= '2024-07-05') & (narsingdi_data['date'] <= '2024-09-05')]
narsingdi_data_weekly = weekly_data[weekly_data["ADM2_EN"] == "NARSINGDI"]
narsingdi_data_monthly = monthly_data[monthly_data["ADM2_EN"] == "NARSINGDI"]

# Define the key event dates and labels
event_dates = [
    datetime(2024, 7, 18),
    datetime(2024, 7, 19),
    datetime(2024, 8, 5),
    datetime(2024, 8, 8)
]
event_labels = [
    "Internet Cut",
    "Curfews Imposed",
    "Prime Minister Resigns",
    "Transition Government"
]

# Create the tabs
tabs = []

#Dhak

# Add grpah with weekly plot for Dhaka

tabs.append(
    TabPanel(
        title="Weekly-Dhaka",
        child=create_line_plot(
            data=dhaka_data_weekly,
            x_column="week_start",
            y_column="distance_category_ping_fraction",
            category_column="home_to_ping_distance_category",
            title="Weekly Movement Distribution in Dhaka",
            x_label="Week",
            y_label="Ping Fraction",
            subtitle="Weekly data is calculated by aggregating daily data from Sunday to Wednesday.",
            source="Source: Meta Data for Good"
        )
    )
)

# Add grpah with daily plot for Dhaka

tabs.append(
    TabPanel(
        title="Daily-Dhaka",
        child=create_line_plot(
            data=dhaka_data_crisis,
            x_column="date",
            y_column="distance_category_ping_fraction",
            category_column="home_to_ping_distance_category",
            title="Movement Distribution in Dhaka (July 1st - September 5th)",
            x_label="Date",
            y_label="Ping Fraction",
            subtitle="This plot shows daily movement distribution in Dhaka, during the crisis period.",
            source="Source: Meta Data for Good",
            event_dates=event_dates,
            event_labels=event_labels
        )
    )
)


# Narsingdi
# Add grpah with daily plot for Narsingdi


# Add grpah with weekly plot for Narsingdi

tabs.append(
    TabPanel(
        title="Weekly-Narsingdi",
        child=create_line_plot(
            data=narsingdi_data_weekly,
            x_column="week_start",
            y_column="distance_category_ping_fraction",
            category_column="home_to_ping_distance_category",
            title="Weekly Movement Distribution in Narsingdi",
            x_label="Week",
            y_label="Ping Fraction",
            subtitle="Weekly data is calculated by aggregating daily data from Sunday to Wednesday.",
            source="Source: Meta Data for Good"
        )
    )
)


tabs.append(
    TabPanel(
        title="Daily-Narsingdi",
        child=create_line_plot(
            data=narsingdi_data_crisis,
            x_column="date",
            y_column="distance_category_ping_fraction",
            category_column="home_to_ping_distance_category",
            title="Movement Distribution in Narsingdi (July 1st - September 5th)",
            x_label="Date",
            y_label="Ping Fraction",
            subtitle="This plot shows daily movement distribution in Narsingdi, during the crisis period.",
            source="Source: Meta Data for Good",
            event_dates=event_dates,
            event_labels=event_labels
        )
    )
)


# Create the tabs layout
tabs_layout = Tabs(tabs=tabs)

# Create the limitation note
limitation_note = Div(
    text="""<div style="border: 1px solid #ddd; padding: 10px; background-color: #f9f9f9; font-size: 12pt;">
    <h3 style="margin: 0; font-size: 14pt;">Limitations</h3>
    <p>This visualization is derived from Facebook's <em>Movement Distribution</em> data, which represents the proportion of Facebook users 
    within each distance category. <p>
    However, the total number of users included in the dataset may vary from day to day, especially during events 
    such as internet shutdowns or other disruptions.</p>
    <p>To address these variations, additional analysis on the evolution of Facebook's user population from <strong>August 25th to September 5th, 2024</strong>, 
    is included in the Population During Crisis Analysis.</p>
    </div>"""
)

# Combine the limitation note and tabs into a layout
layout = column(tabs_layout, limitation_note)

# Display the layout
show(layout)





### National Movement

In [14]:
# Activate notebook output
output_notebook()


# Create a how to read it tab
def get_explanation_tab():
    """Create a tab explaining how to read the chart."""
    title_div = Div(
        text="<h2>How to read it?</h2>",
        styles={"text-align": "left", "font-size": "18pt", "font-weight": "bold"}
    )
    
    explanation_div = Div(
        text="""
    <p>This chart visualizes <strong>National Movement Patterns</strong> in Bangladesh, aggregated at the national level, based on data from Facebook's <em>Movement Distribution</em> dataset.</p>
    <p><strong>How to Read the Chart:</strong></p>
    <ul>
        <li><strong>Lines:</strong> Each line represents a distance category ("0 km," "(0, 10 km)," "[10, 100 km)," and "100+ km").</li>
        <li><strong>Y-axis (Ping Fraction):</strong> Proportion of movement within each distance category, nationally aggregated.</li>
        <li><strong>X-axis (Date):</strong> Time period covered by the dataset, allowing for trend analysis over weeks or months.</li>
    </ul>
    <p><strong>What is Ping Fraction?</strong></p>
    <ul>
        <li>Ping Fraction represents the proportion of movement within a specific distance category for each district.</li>
        <li>For national aggregation, the Ping Fraction is weighted by baseline population data from Facebook users (July 11, 2024).</li>
        <li>For example, a Ping Fraction of <strong>0.30</strong> for the "0 km" category means that 30% of observed pings (movements) nationally are within the "0 km" range, indicating people stayed close to their home location.</li>
    </ul>
    <p><strong>National Aggregation Process:</strong></p>
    <ul>
        <li>Weights are based on the number of Facebook users in each district, ensuring proportional representation.</li>
        <li>Ping Fractions from each district are multiplied by their respective weights.</li>
        <li>The weighted values are summed across all districts to calculate the national movement trends.</li>
    </ul>
    <p><strong>Limitations:</strong></p>
    <ul>
        <li>The dataset reflects Facebook users with location services enabled, which may not represent the entire population.</li>
        <li>Privacy-preserving noise is added, and regions with insufficient data are excluded.</li>
        <li>Consider potential biases in Facebook's user base when interpreting movement patterns.</li>
    </ul>
        """,
        styles={"text-align": "left", "font-size": "12pt"}
    )
    
    layout = column(title_div, explanation_div)
    return layout


# Create the tabs
tabs = []

# Add the national daily movement distribution plot tab
tabs.append(
    TabPanel(
        child=create_line_plot(
            data=national_daily_data,
            x_column="date",
            y_column="national_ping_fraction",
            category_column="home_to_ping_distance_category", 
            title="National Daily Movement Distribution (Dec 1 2022 - Dec 03 2024)",
            source="Source: Data for Good Meta",
            subtitle="This plot shows the national daily movement distribution.",
            x_label="Date",
            y_label="National Ping Fraction"
        ),
        title="Daily",
    )
)

# Add the weekly national movement distribution plot tab
tabs.append(
    TabPanel(
        child=create_line_plot(
            data=national_weekly_data,
            x_column="week_start",
            y_column="national_ping_fraction",
            category_column="home_to_ping_distance_category", 
            title="National Weekly Movement Distribution (Dec 1 2022 - Dec 03 2024)",
            source="Source: Data for Good Meta",
            subtitle="Weekly data is calculated by aggregating daily data from Sunday to Wednesday",
            x_label="Date",
            y_label="National Ping Fraction"
        ),
        title="Weekly",
    )
)

# add daily national movement distribution plot tab (july 2024 to september 5th 2024)

# filter daily data for that period 

national_daily_data_filtered = national_daily_data[(national_daily_data["date"] >= "2024-07-01") & (national_daily_data["date"] <= "2024-09-05")]

# Define the key event dates and labels
event_dates = [
    datetime(2024, 7, 18),
    datetime(2024, 7, 19),
    datetime(2024, 8, 5),
    datetime(2024, 8, 8)
]
event_labels = [
    "Internet Cut",
    "Curfews Imposed",
    "Prime Minister Resigns",
    "Transition Government"
]

# Add the tab with the plot for the political crisis
tabs.append(
    TabPanel(
        child=create_line_plot(
            data=national_daily_data_filtered,
            x_column="date",
            y_column="national_ping_fraction",
            category_column="home_to_ping_distance_category",
            title="National Daily Movement Distribution (July 2024 - September 2024)",
            source="Source: Data for Good Meta",
            subtitle="National average with key events during the crisis",
            x_label="Date",
            y_label="National Ping Fraction",
            event_dates=event_dates,
            event_labels=event_labels
        ),
        title="Daily-Political Crisis",
    )
)


# Add the explanation tab
tabs.append(
    TabPanel(
        child=get_explanation_tab(),
        title="How to read it?",
    )
)

# Display the tabs
tabs_layout = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs_layout)


In [57]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Tabs, TabPanel
from bokeh.transform import dodge
from bokeh.palettes import Category10



# Define the fixed order of categories
category_order = ["0", "(0, 10)", "[10, 100)", "100+"]

# Load the dataset
df = national_daily_data  # Replace with your actual DataFrame

# Convert the 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Define baseline (Dec 2022 to Nov 2023) and comparison periods (Dec 2023 to Nov 2024)
baseline_start = '2022-12-01'
baseline_end = '2023-11-30'
comparison_start = '2023-12-01'
comparison_end = '2024-11-30'

# Filter data for baseline and comparison periods
baseline_data = df[(df['date'] >= baseline_start) & (df['date'] <= baseline_end)]
comparison_data = df[(df['date'] >= comparison_start) & (df['date'] <= comparison_end)]

# Calculate the baseline average by distance category
baseline_avg = (
    baseline_data.groupby("home_to_ping_distance_category")['national_ping_fraction']
    .mean()
    .reset_index()
    .rename(columns={"national_ping_fraction": "baseline_avg"})
)

# Calculate the comparison average by distance category
comparison_avg = (
    comparison_data.groupby("home_to_ping_distance_category")['national_ping_fraction']
    .mean()
    .reset_index()
    .rename(columns={"national_ping_fraction": "comparison_avg"})
)

# Merge baseline and comparison averages
merged_data = pd.merge(baseline_avg, comparison_avg, on="home_to_ping_distance_category")
merged_data['percentage_change'] = (
    (merged_data['comparison_avg'] - merged_data['baseline_avg']) / merged_data['baseline_avg'] * 100
)

# Reorder categories
merged_data['home_to_ping_distance_category'] = pd.Categorical(
    merged_data['home_to_ping_distance_category'], categories=category_order, ordered=True
)

# Merge baseline averages with the entire dataset for day-by-day comparison
df_with_baseline = df.merge(baseline_avg, on="home_to_ping_distance_category", how="left")

### Movement baseline

In [60]:
from bokeh.models import HoverTool, ColumnDataSource, Tabs, TabPanel, Div
from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.transform import dodge



df_with_baseline['percentage_from_baseline'] = (
    (df_with_baseline['national_ping_fraction'] - df_with_baseline['baseline_avg']) /
    df_with_baseline['baseline_avg'] * 100
)

df_with_baseline['date_str'] = df_with_baseline['date'].dt.strftime('%Y-%m-%d')  # Add string version of date

# Define fixed order for categories and colors
category_order = ["0", "(0, 10)", "[10, 100)", "100+"]
colors = ["blue", "orange", "green", "red"]

# Title, subtitle, and source for all tabs
title = "Movement Distribution: Baseline and Comparison"
subtitle = "Baseline period: Dec 2022 - Nov 2023"
source = "Source: Meta, Data for Good"

title_div = Div(
    text=f"<h2>{title}</h2>", 
    styles={"text-align": "left", "font-size": "14pt", "font-weight": "bold"}
)
subtitle_div = Div(
    text=f"<p><em>{subtitle}</em></p>", 
    styles={"text-align": "left", "font-size": "12pt", "margin-bottom": "10px"}
)
source_div = Div(
    text=f"<p><small>{source}</small></p>",
    styles={"text-align": "left", "font-size": "12pt", "margin-top": "10px"}
)

# Prepare data for baseline vs comparison and percentage change
source_baseline_comparison = ColumnDataSource({
    'home_to_ping_distance_category': category_order,
    'baseline_avg': df_with_baseline.groupby('home_to_ping_distance_category')['baseline_avg'].mean(),
    'comparison_avg': df_with_baseline.groupby('home_to_ping_distance_category')['national_ping_fraction'].mean(),
    'percentage_change': (
        (df_with_baseline.groupby('home_to_ping_distance_category')['national_ping_fraction'].mean() -
         df_with_baseline.groupby('home_to_ping_distance_category')['baseline_avg'].mean()) /
        df_with_baseline.groupby('home_to_ping_distance_category')['baseline_avg'].mean() * 100
    )
})

# Tab 1: Baseline vs Comparison
p1 = figure(x_range=category_order, 
            height=400, width=800, 
            title="Baseline vs Comparison: Movement Distribution",
            toolbar_location=None, tools="")

p1.vbar(x=dodge('home_to_ping_distance_category', -0.2, range=p1.x_range), 
        top='baseline_avg', width=0.4, source=source_baseline_comparison, legend_label="Baseline", color="blue")

p1.vbar(x=dodge('home_to_ping_distance_category', 0.2, range=p1.x_range), 
        top='comparison_avg', width=0.4, source=source_baseline_comparison, legend_label="Comparison", color="green")

layout_tab1 = column(title_div, subtitle_div, p1, source_div)

# Tab 2: Percentage Change
p2 = figure(x_range=category_order, 
            height=400, width=800, 
            title="Percentage Change from Baseline",
            toolbar_location=None, tools="")

p2.vbar(x='home_to_ping_distance_category', 
        top='percentage_change', width=0.6, source=source_baseline_comparison, color="orange", legend_label="Percentage Change")

layout_tab2 = column(title_div, subtitle_div, p2, source_div)

# Tab 3: Day-by-Day Comparison
p3 = figure(height=400, width=800, title="Day-by-Day Comparison to Baseline", x_axis_type="datetime")
for i, category in enumerate(category_order):
    category_data = df_with_baseline[df_with_baseline['home_to_ping_distance_category'] == category]
    category_source = ColumnDataSource(category_data)
    p3.line(x='date', y='percentage_from_baseline', source=category_source, legend_label=category, line_width=2, color=colors[i])
    hover = HoverTool(
        renderers=[p3.renderers[-1]],  # Attach to the specific line
        tooltips=[
            ("Date", "@date_str"),
            ("Category", category),
            ("% Change", "@percentage_from_baseline{0.2f}%")
        ]
    )
    p3.add_tools(hover)
layout_tab3 = column(title_div, subtitle_div, p3, source_div)

# Tab 4: How to Read It?
how_to_read_text = """
<h3>How to Read the Graphs</h3>
<p>The graphs provide insights into movement distribution patterns based on the following:</p>
<ul>
    <li><strong>Baseline vs Comparison:</strong> Shows the average movement during the baseline period (Dec 2022 - Nov 2023) and the comparison period (Dec 2023 - Nov 2024) for each distance category.</li>
    <li><strong>Percentage Change:</strong> Displays the percentage change from the baseline for each distance category during the comparison period.</li>
    <li><strong>Day-by-Day Change:</strong> Tracks daily movement patterns relative to the baseline. A value of 0% indicates no change from the baseline, while positive/negative values indicate increases/decreases.</li>
</ul>
<p><small>Source: Meta, Data for Good</small></p>
"""
how_to_read_div = Div(
    text=how_to_read_text,
    styles={"text-align": "left", "font-size": "12pt", "line-height": "1.5"}
)
layout_tab4 = column(how_to_read_div)

# Create tabs
tab1 = TabPanel(child=layout_tab1, title="Baseline vs Comparison")
tab2 = TabPanel(child=layout_tab2, title="Year-by-Year Change")
tab3 = TabPanel(child=layout_tab3, title="Day-by-Day Change")
tab4 = TabPanel(child=layout_tab4, title="How to Read It?")

# Combine all tabs
tabs_layout = Tabs(tabs=[tab1, tab2, tab3, tab4], sizing_mode="scale_both")

# Show the tabs
show(tabs_layout)



### Movement by district

In [130]:
# Standardize ADM2_EN for both datasets to ensure a clean join
bgd_adm2["ADM2_EN"] = bgd_adm2["ADM2_EN"].str.upper()
# select only the columns needed
bgd_adm2 = bgd_adm2[["ADM2_EN", "geometry"]]

# filter movement 0 km data
movement_0km = movementDistribution[movementDistribution["home_to_ping_distance_category"] == "0"]
movement_0km["ADM2_EN"] = movement_0km["ADM2_EN"].str.upper()

# Merge the movement data with the geographic data for 0 km 
bgd_adm2_merged = pd.merge(
    movement_0km,  # Mobility data for "0 km" category
    bgd_adm2,  # Administrative boundary data
    on="ADM2_EN",  # Match districts
    how="inner"  # Inner join ensures only matching districts are included
)

# Convert back to a GeoDataFrame
bgd_adm2_merged = gpd.GeoDataFrame(bgd_adm2_merged, geometry="geometry", crs=bgd_adm2.crs)

# filter movement 100+ km data
movement_100km = movementDistribution[movementDistribution["home_to_ping_distance_category"] == "100+"]
movement_100km["ADM2_EN"] = movement_100km["ADM2_EN"].str.upper()

# Merge the movement data with the geographic data for 100+ km
bgd_adm2_merged_100km = pd.merge(
    movement_100km,  # Mobility data for "100+ km" category
    bgd_adm2,  # Administrative boundary data
    on="ADM2_EN",  # Match districts
    how="inner"  # Inner join ensures only matching districts are included
)

# Convert back to a GeoDataFrame
bgd_adm2_merged_100km = gpd.GeoDataFrame(bgd_adm2_merged_100km, geometry="geometry", crs=bgd_adm2.crs)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movement_0km["ADM2_EN"] = movement_0km["ADM2_EN"].str.upper()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movement_100km["ADM2_EN"] = movement_100km["ADM2_EN"].str.upper()


In [143]:
from bokeh.models import Div, Panel, Tabs
from bokeh.layouts import column
from bokeh.plotting import show
from bokeh.io import output_notebook

def map_plot_r(geo_data, date, column="distance_category_ping_fraction"):
    """
    Create a static map for a specific date with a diverging color scale.
    
    Parameters:
    - geo_data: GeoDataFrame with geometry and movement data.
    - date: str, date to filter data for the map (YYYY-MM-DD).
    - column: str, the column to visualize (default: "distance_category_ping_fraction").
    
    Returns:
    - Base64-encoded image string for embedding in HTML.
    """
    import matplotlib.pyplot as plt
    from matplotlib.colors import TwoSlopeNorm
    import matplotlib.cm as cm
    import base64
    from io import BytesIO

    # Filter data for the specific date
    filtered_data = geo_data[geo_data["date"] == date]
    
    # Set up the color map and normalization
    cmap = cm.RdYlGn_r  # Diverging colormap: Red for below 0.5, green for above
    norm = TwoSlopeNorm(vmin=0, vcenter=0.5, vmax=1)  # Centered at 0.5

    # Plot the map
    fig, ax = plt.subplots(1, 1, figsize=(10, 8))
    filtered_data.plot(
        column=column, 
        cmap=cmap, 
        legend=True, 
        ax=ax,
        legend_kwds={
            'label': "Ping Fraction",
            'orientation': "vertical",
            'shrink': 0.6  # Adjust the legend size
        },
        norm=norm  # Apply the diverging normalization
    )
   # ax.set_title(f"Mobility Patterns (0 km) on {date}")
   # ax.axis("off")
    
    # Convert plot to Base64 string
    buf = BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    plt.close(fig)
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode("utf-8")
    return img_base64

def map_plot(geo_data, date, column="distance_category_ping_fraction"):
    """
    Create a static map for a specific date with a diverging color scale.
    
    Parameters:
    - geo_data: GeoDataFrame with geometry and movement data.
    - date: str, date to filter data for the map (YYYY-MM-DD).
    - column: str, the column to visualize (default: "distance_category_ping_fraction").
    
    Returns:
    - Base64-encoded image string for embedding in HTML.
    """
    import matplotlib.pyplot as plt
    from matplotlib.colors import TwoSlopeNorm
    import matplotlib.cm as cm
    import base64
    from io import BytesIO

    # Filter data for the specific date
    filtered_data = geo_data[geo_data["date"] == date]
    
    # Set up the color map and normalization
    cmap = cm.RdYlGn  
    norm = TwoSlopeNorm(vmin=0, vcenter=0.5, vmax=1)  # Centered at 0.5

    # Plot the map
    fig, ax = plt.subplots(1, 1, figsize=(10, 8))
    filtered_data.plot(
        column=column, 
        cmap=cmap, 
        legend=True, 
        ax=ax,
        legend_kwds={
            'label': "Ping Fraction",
            'orientation': "vertical",
            'shrink': 0.6  # Adjust the legend size
        },
        norm=norm  # Apply the diverging normalization
    )
    #ax.set_title(f"Mobility Patterns (0 km) on {date}")
    #ax.axis("off")
    
    # Convert plot to Base64 string
    buf = BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    plt.close(fig)
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode("utf-8")
    return img_base64




In [144]:
from bokeh.models import Div, Panel, Tabs
from bokeh.layouts import column
from bokeh.plotting import show
from bokeh.io import output_notebook

output_notebook()

# Function to create the map tab
def get_map_tab(data, date, title, source, subtitle=None):
    from bokeh.models import Div
    img_base64 = map_plot(data, date)  # Assuming map_plot generates a Base64 image
    title_div = Div(
        text=f"<h2>{title}</h2>", 
        styles={"text-align": "left", "font-size": "12pt", "font-weight": "bold"}
    )
    subtitle_div = Div(
        text=f"<p><em>{subtitle}</em></p>" if subtitle else "", 
        styles={"text-align": "left", "font-size": "10pt", "margin-bottom": "10px"}
    )
    source_div = Div(
        text=f"<p><small>{source}</small></p>", 
        styles={"text-align": "left", "font-size": "10pt", "margin-top": "10px"}
    )
    img_tag = f'<img src="data:image/png;base64,{img_base64}" width="600">'
    img_div = Div(
        text=img_tag, 
        styles={"width": "600px", "margin": "auto", "padding": "10px"}
    )
    layout = column(title_div, subtitle_div, img_div, source_div, width=650, height=800)
    return layout

# Function for the explanation tab
# Function for the explanation tab
def get_explanation_tab():
    explanation_div = Div(
        text="""
        <div>
            <h2>How to Interpret This Map</h2>
            <p>This map visualizes mobility patterns in Bangladesh, focusing on people who stayed at home during key dates of the political crisis. Here's how to understand it:</p>
            <ul>
                <li><strong>Colors:</strong> Represent the proportion of people staying at home (not traveling).
                    <ul>
                        <li><span style="color:red;">Red:</span> Higher values, indicate more mobility; in this graph a larger share of people travelling more than 0 km.</li>
                        <li><span style="color:green;">Green:</span> Lower values, less mobility; more people staying at home (staying put).</li>
                    </ul>
                </li>
                <li><strong>Scale:</strong> Ranges from 0 to 1.
                    <ul>
                        <li><strong>1:</strong> Larger share of people staying at home.</li>
                        <li><strong>0:</strong> More people in another category (e.g., traveling or engaging in other activities).</li>
                    </ul>
                </li>
            </ul>
            <p><em>Note:</em> The data reflects Facebook users with location services enabled, so it may not fully represent the entire population.</p>
        </div>
        """,
        styles={"font-size": "12pt", "margin": "10px", "line-height": "1.6"}
    )
    return column(explanation_div, width=650, height=800)

# Define key dates
key_dates = ["2024-07-16", "2024-07-18", "2024-07-19", 
             "2024-07-21", "2024-07-22", "2024-08-05", "2024-08-10"]

# Create tabs for each key date
tabs = []

for date in key_dates:
    tabs.append(
        TabPanel(
            child=get_map_tab(
                bgd_adm2_merged,
                date=date,
                title=f"Mobility Patterns (0 km) on {date}",
                source="Source: Data for Good Meta",
                subtitle="Focused on the political crisis in Bangladesh"
            ),
            title=date
        )
    )

# Add explanation tab
tabs.append(
    TabPanel(
        child=get_explanation_tab(),
        title="How to read it?"
    )
)

# Create tabs for the maps
tabs_layout = Tabs(tabs=tabs)

# Create the limitation note
limitation_note = Div(
    text="""<div style="border: 1px solid #ddd; padding: 10px; background-color: #f9f9f9; font-size: 12pt;">
    <h3 style="margin: 0; font-size: 14pt;">Limitations</h3>
    <p>This visualization is derived from Facebook's <em>Movement Distribution</em> data, which represents the proportion of Facebook users 
    within each distance category. <p>
    However, the total number of users included in the dataset may vary from day to day, especially during events 
    such as internet shutdowns or other disruptions.</p>
    <p>To address these variations, additional analysis on the evolution of Facebook's user population from <strong>August 25th to September 5th, 2024</strong>, 
    is included in the Population During Crisis Analysis.</p>
    </div>"""
)

# Combine the tabs and the limitation note in the correct order
layout = column(
    tabs_layout,
    limitation_note,
    width=750,  # Uniform width for both components
    height_policy="fit"
)

# Display the layout
show(layout)




In [149]:
output_notebook()

from bokeh.models import Div, Panel, Tabs
from bokeh.layouts import column
from bokeh.plotting import show
from bokeh.io import output_notebook

output_notebook()

# Function to create the map tab
def get_map_tab_r(data, date, title, source, subtitle=None):
    from bokeh.models import Div
    img_base64 = map_plot_r(data, date)  # Assuming map_plot generates a Base64 image
    title_div = Div(
        text=f"<h2>{title}</h2>", 
        styles={"text-align": "left", "font-size": "12pt", "font-weight": "bold"}
    )
    subtitle_div = Div(
        text=f"<p><em>{subtitle}</em></p>" if subtitle else "", 
        styles={"text-align": "left", "font-size": "10pt", "margin-bottom": "10px"}
    )
    source_div = Div(
        text=f"<p><small>{source}</small></p>", 
        styles={"text-align": "left", "font-size": "10pt", "margin-top": "10px"}
    )
    img_tag = f'<img src="data:image/png;base64,{img_base64}" width="600">'
    img_div = Div(
        text=img_tag, 
        styles={"width": "600px", "margin": "auto", "padding": "10px"}
    )
    layout = column(title_div, subtitle_div, img_div, source_div, width=650, height=800)
    return layout

# Function for the explanation tab
def get_explanation_tab():
    explanation_div = Div(
        text="""
        <div>
            <h2>How to Interpret This Map</h2>
            <p>This map visualizes mobility patterns in Bangladesh during key dates of the political crisis. Here's how to understand it:</p>
            <ul>
                <li><strong>Colors:</strong> Represent mobility levels. 
                    <ul>
                        <li><span style="color:red;">Red:</span> Higher values, indicating more mobility, here a larger share of people traveling more than 100 km.</li>
                        <li><span style="color:green;">Green:</span> Lower values, indicating less mobility, here fewer people traveling this distance.</li>
                    </ul>
                </li>
                <li><strong>Scale:</strong> Ranges from 0 to 1.
                    <ul>
                        <li><strong>0:</strong> Smaller share of people traveling more than 100 km.</li>
                        <li><strong>1:</strong> Larger share of people traveling more than 100 km.</li>
                    </ul>
                </li>
            </ul>
            <p><em>Note:</em> The data reflects Facebook users with location services enabled, so it may not fully represent the entire population.</p>
        </div>
        """,
        styles={"font-size": "12pt", "margin": "10px", "line-height": "1.6"}
    )
    return column(explanation_div, width=650, height=800)

# Define key dates
key_dates = ["2024-07-16", "2024-07-18", "2024-07-19", 
             "2024-07-21", "2024-07-22", "2024-08-05", "2024-08-10"]

# Create tabs for maps and explanations
tabs = []

for date in key_dates:
    tabs.append(
        TabPanel(
            child=get_map_tab_r(
                bgd_adm2_merged_100km,
                date=date,
                title=f"Mobility Patterns (100+ km) on {date}",
                source="Source: Data for Good Meta",
                subtitle="Focused on the political crisis in Bangladesh"
            ),
            title=date
        )
    )

tabs.append(
    TabPanel(
        child=get_explanation_tab(),
        title="How to read it?"
    )
)

# Display the tabs
tabs_layout = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs_layout)