In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Span
from shapely.geometry import Point
import geopandas as gpd
import glob
import bokeh
from datetime import datetime
from bokeh.layouts import column
from bokeh.models import Legend, Tabs, TabPanel
from bokeh.core.validation.warnings import MISSING_RENDERERS, EMPTY_LAYOUT

# Set fonts for matplotlib
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 14

In [3]:

bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(EMPTY_LAYOUT, True)



In [4]:
# Function to convert a pandas dataframe to a geopandas dataframe
def convert_to_gdf(df):
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

    return gdf

In [5]:
# Read shapefiles from HdX UNOCHA
bgd_adm2 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm2_bbs_20201113.shp"
)
bgd_adm1 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm1_bbs_20201113.shp"
)

  _init_gdal_data()


# **Population During Crisis Analysis**

The **Population During Crisis** dataset tracks the number of Facebook users in different geographic regions, spanning **August 25th, 2024, to September 5th, 2024**. This dataset provides insights into:
- **The count of Facebook users and how this number changes over time.**
- **Identifying areas heavily impacted by the crisis.**
- **Understanding population movement dynamics and estimating how many people are affected.**

---

## **Purpose**
This analysis examines how population counts fluctuate at the **national level** in Bangladesh during and after the recent political crisis. 

---

## **Objectives**

### **National Trends**  
- Track the total number of Facebook users in Bangladesh across different time intervals (`00:00`, `08:00`, `16:00`).  
- Compare the **crisis period** with the **baseline period** to understand movement patterns.  

---

## **How is the Population Calculated?**

The dataset has two main variables **baseline population** and **crisis population**

The **baseline population (`n_baseline`)** is derived from **pre-crisis Facebook population maps**, using the following methodology:
- Facebook counts users with **location services enabled** on their mobile devices.  
- The dataset captures the **most frequent location** of users in **8-hour intervals**.  
- If a user appears in multiple locations within the same interval, only their **most frequent location** is counted.  
- Privacy-preserving techniques ensure that individual movements cannot be identified.

The **crisis population (`n_crisis`)** represents the actual number of Facebook users observed **during the crisis period** under the same methodology.

In [7]:
# Read the population between places during crisis data
all_files = glob.glob("../../data/movement-analysis/raw/population-during-crisis/*.csv")
li = []
for file in all_files:
    df1 = pd.read_csv(file)
    li.append(df1)
# Concatenate all the data into one DataFrame
populationCrisis = pd.concat(li, axis=0)

In [8]:

populationCrisis = convert_to_gdf(populationCrisis)

# Spatial join the population baseline data with the adm2 shapefile to get the adm2 level population

joined_data = gpd.sjoin(populationCrisis, bgd_adm2, how="left", predicate="intersects")


In [11]:
# Ensure 'ds' and 'date_time' columns are in datetime format
joined_data['date_time'] = pd.to_datetime(joined_data['date_time'])
joined_data['ds'] = pd.to_datetime(joined_data['ds'])

# Extract the time category (hour part) and add it as a new column
joined_data['time_category'] = joined_data['date_time'].dt.strftime('%H')  # Extract hour as string ('00', '08', '16')

# Extract the date part (ignoring time) and overwrite 'ds' column to keep only the date
joined_data['ds'] = joined_data['ds'].dt.date

# Aggregating by time_category, date, and ADM2_EN for admin-level data
aggregated_admin2_data = (
    joined_data.groupby(['time_category', 'ds', 'ADM2_EN'])
    .agg({
        'n_baseline': 'sum',
        'n_crisis': 'sum'
    })
    .reset_index()
)

# Aggregating by time_category and date for national-level data
aggregated_national_data = (
    joined_data.groupby(['time_category', 'ds'])
    .agg({
        'n_baseline': 'sum',
        'n_crisis': 'sum'
    })
    .reset_index()
)


 ### Facebook Population During Crisis

In [16]:
from bokeh.palettes import Set2
from bokeh.models import Span, Label, ColumnDataSource, HoverTool, Div, Legend, LegendItem, NumeralTickFormatter
from bokeh.layouts import column
from bokeh.plotting import figure


from bokeh.models import HoverTool, ColumnDataSource

def create_time_based_line_plot(data, x_column, y_column, time_column, title, x_label, y_label,
                                subtitle=None, source_text=None, event_dates=None, event_labels=None):
    """
    Creates a Bokeh line plot with multiple lines for different time categories (00, 08, 16 hours).
    """
    # Ensure the x_column is in datetime format
    data = data.copy()
    data[x_column] = pd.to_datetime(data[x_column])

    # Define color palette
    time_categories = ['00', '08', '16']  # Expected time categories
    colors = Set2[len(time_categories)]

    # Initialize the figure
    p = figure(
        x_axis_type="datetime",
        title=title,
        width=800,
        height=400,
        toolbar_location="above"
    )
    
    # Plot lines for each time category
    for idx, time_category in enumerate(time_categories):
        time_data = data[data[time_column] == time_category]
        if not time_data.empty:
            time_source = ColumnDataSource(time_data)  # Create ColumnDataSource for hover tool
            p.line(
                x=x_column,
                y=y_column,
                source=time_source,
                line_width=2,
                color=colors[idx],
                legend_label=f"{time_category}:00"
            )

    # Add vertical event lines and labels if provided
    if event_dates and event_labels:
        for date, label in zip(event_dates, event_labels):
            vline = Span(location=date.timestamp() * 1000, dimension="height", line_color="gray",
                         line_dash="dashed", line_width=2)
            p.add_layout(vline)
            event_label = Label(x=date.timestamp() * 1000, y=0, text=label, text_font_size="10pt",
                                x_offset=5, y_offset=5)
            p.add_layout(event_label)

    # Configure hover tool
    hover = HoverTool(
        tooltips=[
            ("Date", f"@{x_column}{{%F}}"),
            ("Population Count", f"@{y_column}{{0,0}}")
        ],
        formatters={f"@{x_column}": "datetime"}
    )
    p.add_tools(hover)

    # Configure plot style
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    p.yaxis.formatter = NumeralTickFormatter(format="0,0")  # Format y-axis numbers with commas
    p.legend.title = "Time Categories"
    p.legend.click_policy = "hide"

    # Use Div for title and subtitle
    title_div = Div(
        text=f"<h2>{title}</h2>",
        styles={"text-align": "left", "font-size": "14pt", "font-weight": "bold"}
    )
    subtitle_div = Div(
        text=f"<p><em>{subtitle}</em></p>" if subtitle else "",
        styles={"text-align": "left", "font-size": "12pt", "margin-bottom": "10px"}
    )
    source_div = Div(
        text=f"<p><small>{source_text}</small></p>",
        styles={"text-align": "left", "font-size": "12pt", "margin-top": "10px"}
    )
    
    # Combine all elements
    layout = column(title_div, subtitle_div, p, source_div)
    return layout


In [20]:
# Activate notebook output
output_notebook()

def get_explanation_tab():
    """
    Create a tab explaining how to read the chart.
    """
    title_div = Div(
        text="<h2>How to Read the Chart</h2>",
        styles={"text-align": "left", "font-size": "18pt", "font-weight": "bold"}
    )
    
    explanation_div = Div(
        text="""
        <p>This dashboard visualizes <strong>National Movement Patterns</strong> in Bangladesh, aggregated at the national level, based on data from Facebook's <em>Population During Crisis</em> dataset.</p>
        
        <p><strong>Tabs:</strong></p>
        <ul>
            <li><strong>Baseline:</strong> Expected population count under normal conditions.</li>
            <li><strong>Crisis:</strong> Observed population count during the crisis.</li>
            <li><strong>Difference:</strong> Change in population (Crisis - Baseline), highlighting movement trends.</li>
        </ul>
        
        <p><strong>How to Read the Charts:</strong></p>
        <ul>
            <li><strong>Lines:</strong> Each line represents a different time category (00:00, 08:00, 16:00).</li>
            <li><strong>Y-axis (Population Count):</strong> Total population observed during the respective time period.</li>
            <li><strong>X-axis (Date):</strong> Represents the date, allowing trend analysis over time.</li>
        </ul>
        
        <p><strong>Interpreting the Difference Chart:</strong></p>
        <ul>
            <li><strong>Values above 0:</strong> More people were present in an area during the crisis than expected.</li>
            <li><strong>Values below 0:</strong> Fewer people were present in an area during the crisis than expected.</li>
            <li><strong>The dashed line at 0:</strong> Acts as a reference to show whether population movement resulted in increases or decreases.</li>
        </ul>

        <p><strong>Limitations:</strong></p>
        <ul>
            <li>The dataset reflects Facebook users with location services enabled, which may not represent the entire population.</li>
            <li>Privacy-preserving noise is added, and regions with insufficient data are excluded.</li>
        </ul>
        """,
        styles={"text-align": "left", "font-size": "12pt"}
    )
    return column(title_div, explanation_div)



# Create tabs
tabs = []

# Create the baseline plot
baseline_plot = create_time_based_line_plot(
    data=aggregated_national_data,  # National-level data
    x_column="ds",
    y_column="n_baseline",  # Baseline population
    time_column="time_category",
    title="Baseline Population (Normal Conditions)",
    x_label="Date",
    y_label="Population Count",
    subtitle="Expected Facebook population before the crisis.",
    source_text="Source: Data for Good Meta"
)

# Create the crisis plot
crisis_plot = create_time_based_line_plot(
    data=aggregated_national_data,  # National-level data
    x_column="ds",
    y_column="n_crisis",  # Crisis population
    time_column="time_category",
    title="Crisis Population (During Crisis)",
    x_label="Date",
    y_label="Population Count",
    subtitle="Observed Facebook population during the crisis.",
    source_text="Source: Data for Good Meta"
)

# Compute the difference dataset
aggregated_national_data["population_difference"] = (
    aggregated_national_data["n_crisis"] - aggregated_national_data["n_baseline"]
)

# Create the difference plot
difference_plot = create_time_based_line_plot(
    data=aggregated_national_data,  # National-level data
    x_column="ds",
    y_column="population_difference",  # Difference between crisis and baseline
    time_column="time_category",
    title="Population Change (Crisis - Baseline)",
    x_label="Date",
    y_label="Population Change",
    subtitle="Change in Facebook population count (increase or decrease).",
    source_text="Source: Data for Good Meta"
)

# Add a horizontal dashed line at y = 0
zero_line = Span(location=0, dimension="width", line_color="black", 
                 line_dash="dashed", line_width=2)
difference_plot.children[2].renderers.append(zero_line)  # Add line to the plot

# Add plots as separate tabs
tabs.append(TabPanel(child=baseline_plot, title="Baseline"))
tabs.append(TabPanel(child=crisis_plot, title="Crisis"))
tabs.append(TabPanel(child=difference_plot, title="Difference"))

# Add explanation tab
tabs.append(TabPanel(child=get_explanation_tab(), title="How to read it?"))

# Show the tabs
tabs_layout = Tabs(tabs=tabs)
show(tabs_layout)
