In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Span
from shapely.geometry import Point
import geopandas as gpd
import glob
import bokeh
from datetime import datetime
from bokeh.layouts import column
from bokeh.models import Legend, Tabs, TabPanel
from bokeh.core.validation.warnings import MISSING_RENDERERS, EMPTY_LAYOUT

# Set fonts for matplotlib
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 14

In [2]:

bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(EMPTY_LAYOUT, True)



In [3]:
# Function to convert a pandas dataframe to a geopandas dataframe
def convert_to_gdf(df):
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

    return gdf

In [4]:
# Read shapefiles from HdX UNOCHA
bgd_adm2 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm2_bbs_20201113.shp"
)
bgd_adm1 = gpd.read_file(
    "../../data/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm1_bbs_20201113.shp"
)

  _init_gdal_data()


# **Facebook Colocation Maps**

Facebook Colocation Maps estimate how often people from different regions are in the same area at the same time, or are “colocated.” For a pair of geographic regions `x` and `y`, these maps estimate the rate at which a randomly chosen person from `x` and a randomly chosen person from `y` are simultaneously located in the same general area during a randomly chosen time in a given week.

## **Notebook Overview**

This notebook contains two main analyses:

1. **Mean Colocation Rate by District**  
   This analysis shows the **average probability** of a colocation between a user from the origin area (`x`) and users from all other areas (`y`) during a given week. This provides an overview of how frequently people in a specific district interact with people from all other districts.

2. **Top 10 District Colocation**  
   This analysis highlights the **top 10 districts** most colocated with a given district. By examining these colocations over time, we can observe the evolution of interaction patterns, which may provide insights into how the dynamics of the Bangladesh crisis influenced regional interactions.

In [5]:
# raw data includes all countries and is big, we will only keep the relevant countries BGD and its neighbors, and save as processed to save space. 
relevant_countries = ["BGD", "IND", "MMR"]

# Read the population between places during crisis data
# contains raw data as downloaded from data for good and placed into the raw folder
# weekly data from 06-24-24 to 09-09-24 there is more availability but because of size I keep crisis data
all_files = glob.glob("../../data/movement-analysis/raw/colocation/*.csv")
li = []
for file in all_files:
    df1 = pd.read_csv(file)
    df1 = df1[df1["country"].isin(relevant_countries)]
    li.append(df1)
# Concatenate all the data into one DataFrame
colocation_ddf = pd.concat(li, axis=0)

# Save the data as processed to save space and the analysis can start from there

colocation_ddf.to_csv("../../data/movement-analysis/processed/colocation/colocation.csv", index=False)

In [6]:
# if you have the processed data start running from here 
# read processed data
colocation_ddf = pd.read_csv("../../data/movement-analysis/processed/colocation/colocation.csv")

# filter for bgd 
colocation_bgd = colocation_ddf[colocation_ddf["country"] == "BGD"]

# select only needed variables from admin2 

bgd_adm2 = bgd_adm2[["ADM2_EN", "ADM1_EN", "geometry"]]

bgd_adm2['ADM2_EN'] = bgd_adm2['ADM2_EN'].str.lower()
colocation_bgd['polygon1_name'] = colocation_bgd['polygon1_name'].str.lower()
colocation_bgd['polygon2_name'] = colocation_bgd['polygon2_name'].str.lower()

# Merge the datasets on standardized names
bgd_merged_data = pd.merge(
    colocation_bgd,
    bgd_adm2[['ADM2_EN', 'ADM1_EN', 'geometry']],
    left_on='polygon1_name',
    right_on='ADM2_EN',
    how='left'
)
# filter data to include period from 2024-06-24 to 2024-09-09
bgd_merged_data['ds'] = pd.to_datetime(bgd_merged_data['ds'])
bgd_merged_data = bgd_merged_data[
    (bgd_merged_data['ds'] >= '2024-06-24') & (bgd_merged_data['ds'] <= '2024-09-09')
]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  colocation_bgd['polygon1_name'] = colocation_bgd['polygon1_name'].str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  colocation_bgd['polygon2_name'] = colocation_bgd['polygon2_name'].str.lower()


In [7]:
# Graph idea taken from: https://cmmid.github.io/colocation_dashboard_cmmid/?fbclid=IwZXh0bgNhZW0CMTEAAR1FAyQSdg_K96Nd7irL5-w83PRSuuYC2OhT1ZCmBb3sKSy5xFwAcqb_Mi0_aem_dhASIPnyL8DzXUZhPGXR1g
#  Identify "Within" vs. "Between" connections
bgd_merged_data['connection_type'] = bgd_merged_data.apply(
    lambda row: 'Within' if row['polygon1_id'] == row['polygon2_id'] else 'Between', axis=1
)

# Filter for "Between" connections only
bgd_filtered = bgd_merged_data[bgd_merged_data['connection_type'] == 'Between']

#  Calculate mean colocation rate by polygon1_name and date (ds)
mean_ts = bgd_filtered.groupby(['connection_type', 'polygon1_name', 'ds', 'ADM1_EN']).agg(
    mean_colocation=('weekly_colocation_rate', 'mean')
).reset_index()

#  Merge with external geographic data if needed (e.g., shapefile, admin info)
# Assuming `uk_data` is a GeoDataFrame with a column `NAME_2` corresponding to `polygon1_name`
# uk_data = gpd.read_file("path_to_your_geojson.geojson")
# uk_data['NAME_2'] = uk_data['NAME_2'].astype(str)
# mean_ts = mean_ts.merge(
#     uk_data.drop(columns='geometry'), left_on='polygon1_name', right_on='NAME_2', how='left'
# )

#  Calculate early reference values
early_date = bgd_filtered['ds'].min()  # Get the earliest date
early_ref = bgd_filtered[bgd_filtered['ds'] == early_date].groupby(
    ['connection_type', 'polygon1_name']
).agg(
    early_colocation=('weekly_colocation_rate', 'mean')
).reset_index()

# Calculate percent change over time
perc_ts = mean_ts.merge(early_ref, on=['connection_type', 'polygon1_name'], how='left')
perc_ts['mean_colocation'] = (perc_ts['mean_colocation'] / perc_ts['early_colocation']) * 100
perc_ts['type'] = 'perc_change'

# Add type = 'abs_value' for the original time series
mean_ts['type'] = 'abs_value'

# Combine absolute and percent change data
final_ts = pd.concat([mean_ts, perc_ts], ignore_index=True)

In [8]:
from bokeh.palettes import Category20

def create_line_plot(data, x_column, y_column, category_column, title, x_label, y_label):
    """
    Creates a Bokeh line plot with multiple lines for different categories.
    
    Parameters:
    - data: DataFrame, input data
    - x_column: str, column for x-axis values
    - y_column: str, column for y-axis values
    - category_column: str, column for categories to create separate lines
    - title: str, plot title
    - x_label: str, label for x-axis
    - y_label: str, label for y-axis
    
    Returns:
    - Bokeh plot object
    """
    # Ensure the x_column is datetime
    data = data.copy()
    data[x_column] = pd.to_datetime(data[x_column])
    
    # Initialize the figure
    p = figure(
        x_axis_type="datetime",
        title=title,
        width=800,
        height=400,
        toolbar_location="above",
    )
    
    # Define unique categories
    categories = data[category_column].unique()
    
    # Define color palette
    palette = Category20[len(categories)] if len(categories) <= 20 else Category20[20]
    
    # Loop through each category and add a line
    for i, category in enumerate(categories):
        category_data = data[data[category_column] == category]
        category_source = ColumnDataSource(category_data)
        p.line(
            x=x_column,
            y=y_column,
            source=category_source,
            line_width=2,
            color=palette[i % len(palette)],  # Cycle through colors if categories exceed palette size
            legend_label=str(category),
        )
    
    # Add hover tool
    hover = HoverTool(
        tooltips=[
            (x_label, f"@{x_column}{{%F}}"),
            (y_label, f"@{y_column}"),
            ("Category", f"@{category_column}")
        ],
        formatters={f"@{x_column}": "datetime"}
    )
    p.add_tools(hover)
    
    # Style the plot
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    p.legend.location = "top_left"
    p.legend.click_policy = "hide"
    
    return p


In [10]:
from bokeh.models import Tabs, TabPanel, Div, ColumnDataSource
from bokeh.layouts import column
from bokeh.plotting import show
from bokeh.models import HoverTool

output_notebook()

# Function to create the "How to Read" tab
def create_how_to_read_tab():
    """
    Creates a tab explaining how to interpret the plots.
    
    Returns:
    - TabPanel object
    """
    description = Div(
        text="""
        <h2>How to Read the Plots</h2>
        <p>Each line in the plots represents the <strong>mean colocation rate (upper panel)</strong> or 
        <strong>percent change of mean colocation rate (lower panel)</strong> for a specific district over time.</p>
        <h3>Mean Colocation Rate</h3>
        <p>The <strong>Mean Colocation Rate</strong> shows the average probability of a colocation between a user from the origin area with a user from all other areas during a given week.</p>
        <p>A higher value indicates more frequent colocations.</p>
        <h3>Percent Change of Colocation Rate</h3>
        <p>The <strong>Percent Change</strong> normalizes the colocation rates to a baseline value 
        (100%) from the earliest date in the dataset.</p>
        <p>Values above 100% indicate an increase in colocation relative to the baseline, 
        while values below 100% indicate a decrease.</p>
        <h3>Example Interpretation</h3>
        <ul>
            <li>If a district has a <strong>Mean Colocation Rate</strong> of 0.000005, this means that, on average, 
            individuals in this district have a colocation probability of 0.0005% with individuals from other districts.</li>
            <li>If a district has a <strong>Percent Change</strong> value of 150%, this means that colocation 
            has increased by 50% compared to the baseline date.</li>
        </ul>
        """,
        width=800
    )
    return TabPanel(child=description, title="How to Read it?")

# Function to create a tab with two plots for a given Admin 1 region
def create_admin1_tab(admin1_name, full_data, tab_title, tab_subtitle, tab_source):
    """
    Creates a tab with two plots (abs_value and perc_change) for a given Admin 1 region,
    with a title and subtitle for the entire tab.
    
    Parameters:
    - admin1_name: str, name of the Admin 1 region
    - full_data: DataFrame, complete dataset with colocation data for all regions
    - tab_title: str, title for the tab
    - tab_subtitle: str, subtitle for the tab
    - tab_source: str, data source for the tab
    
    Returns:
    - TabPanel object
    """
    # Filter data for the Admin 1 region
    admin1_data = full_data[full_data['ADM1_EN'] == admin1_name]
    
    # Create line plot for abs_value
    abs_plot = create_line_plot(
        data=admin1_data[admin1_data['type'] == 'abs_value'],
        x_column='ds',
        y_column='mean_colocation',
        category_column='polygon1_name',
        title=None,  # Plot-specific title not needed
        x_label="Date",
        y_label="Mean Colocation Rate"
    )
    
    # Create line plot for perc_change
    perc_plot = create_line_plot(
        data=admin1_data[admin1_data['type'] == 'perc_change'],
        x_column='ds',
        y_column='mean_colocation',
        category_column='polygon1_name',
        title=None,  # Plot-specific title not needed
        x_label="Date",
        y_label="Percent Change (%)"
    )
    
    # Use Div for tab title and subtitle
    title_div = Div(
        text=f"<h2>{tab_title}</h2>", 
        styles={"text-align": "center", "font-size": "14pt", "font-weight": "bold"}
    )
    subtitle_div = Div(
        text=f"<p><em>{tab_subtitle}</em></p>",
        styles={"text-align": "center", "font-size": "12pt", "margin-bottom": "10px"}
    )

    source_div = Div(
        text=f"<p><small>{tab_source}</small></p>",
        styles={"text-align": "left", "font-size": "12pt", "margin-bottom": "10px"}
    )
    
    # Combine the title, subtitle, and both plots into a column layout
    layout = column(title_div, subtitle_div, source_div, abs_plot, perc_plot)
    
    # Create a TabPanel with the combined layout
    return TabPanel(child=layout, title=admin1_name)

# Generate tabs for each Admin 1 region
admin1_tabs = []
for admin1_name in final_ts['ADM1_EN'].unique():
    # Create a tab with title and subtitle for the region
    tab_title = f"Mean Colocation Data by District"
    tab_subtitle = "Weekly mean colocation probabilities for administrative areas"
    tab_source = "Source: Meta Data for Good"
    tab = create_admin1_tab(admin1_name, final_ts, tab_title, tab_subtitle, tab_source)
    admin1_tabs.append(tab)


how_to_read_tab = create_how_to_read_tab()
tabs_layout = Tabs(tabs= admin1_tabs + [how_to_read_tab])

# Show the tabs layout
show(tabs_layout)


In [11]:
from bokeh.models import ColumnDataSource, CustomJS, Select, Div, Tabs, Panel, Span
from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.palettes import Blues8
import pandas as pd

# Assuming colocation_bgd is already loaded as a DataFrame

# Filter for "Between" connections only
filtered_data = colocation_bgd[colocation_bgd['polygon1_name'] != colocation_bgd['polygon2_name']]

# Filter data to include period from 2024-06-24 to 2024-09-09
filtered_data = filtered_data[
    (filtered_data['ds'] >= '2024-06-24') & (filtered_data['ds'] <= '2024-09-09')
]

# Calculate the global maximum colocation rate across all data
global_max_colocation = filtered_data['weekly_colocation_rate'].max()

# Define a function to get the top 10 colocated districts for a given district and date
def get_top_10(data, district, date):
    district_data = data[(data['polygon1_name'] == district) & (data['ds'] == date)]
    top_10 = district_data.nlargest(10, 'weekly_colocation_rate')
    return top_10

# Initial setup (default district and date)
default_district = 'dhaka'
default_date = '2024-09-09'
top_10_default = get_top_10(filtered_data, default_district, default_date)

# Choose a single blue color
single_blue = "#08306b"

# Create a ColumnDataSource with the single blue color
source = ColumnDataSource(data={
    'to': top_10_default['polygon2_name'].tolist(),
    'value': top_10_default['weekly_colocation_rate'].tolist(),
    'color': [single_blue] * len(top_10_default)
})

# Create the bar plot
p = figure(y_range=top_10_default['polygon2_name'].tolist(),
           x_range=(0, global_max_colocation),
           title=f"Top 10 Colocated Districts with {default_district.capitalize()}",
           toolbar_location="above", width=800, height=400)

p.hbar(y='to', right='value', height=0.8, source=source, color='color')
p.xaxis.axis_label = "Colocation Rate"
p.yaxis.axis_label = "District"

# Add dropdowns for district and date selection
districts = sorted(filtered_data['polygon1_name'].unique().tolist())
dates = sorted(filtered_data['ds'].unique().tolist())

district_dropdown = Select(title="Select District", value=default_district, options=districts)
date_dropdown = Select(title="Select Date", value=default_date, options=dates)

# JavaScript callback to update the plot dynamically based on district and date
callback = CustomJS(args=dict(source=source, data=filtered_data.to_dict('list'), plot=p),
                    code="""
    const selectedDistrict = districtDropdown.value;
    const selectedDate = dateDropdown.value;
    const districtData = data['polygon1_name'];
    const dsData = data['ds'];
    const colocations = data['weekly_colocation_rate'];
    const targets = data['polygon2_name'];

    // Filter for the selected district and date
    let filteredIndices = [];
    for (let i = 0; i < districtData.length; i++) {
        if (districtData[i] === selectedDistrict && dsData[i] === selectedDate) {
            filteredIndices.push(i);
        }
    }

    // Get top 10 colocated districts
    const sortedIndices = filteredIndices.sort((a, b) => colocations[b] - colocations[a]).slice(0, 10);
    const topTo = sortedIndices.map(i => targets[i]);
    const topValues = sortedIndices.map(i => colocations[i]);

    // Apply the same blue color to all bars
    const singleBlue = "#08306b";
    const colors = Array(topTo.length).fill(singleBlue);

    // Update the data source
    source.data = {
        to: topTo,
        value: topValues,
        color: colors
    };

    // Update y_range and title
    plot.y_range.factors = topTo;
    plot.title.text = `Top 10 Colocated Districts with ${selectedDistrict.charAt(0).toUpperCase() + selectedDistrict.slice(1)}`;
    source.change.emit();
""")

# Link both dropdowns to the callback
callback.args['districtDropdown'] = district_dropdown
callback.args['dateDropdown'] = date_dropdown

district_dropdown.js_on_change("value", callback)
date_dropdown.js_on_change("value", callback)

# Add title, subtitle, and source
title_div = Div(
    text="<h2>Colocation Analysis: Top 10 Districts</h2>",
    styles={"text-align": "center", "font-size": "16pt", "font-weight": "bold"}
)
subtitle_div = Div(
    text="<p><em>This chart shows the top 10 districts colocated with the selected district and date based on the weekly colocation rate.</em></p>",
    styles={"text-align": "center", "font-size": "12pt", "margin-bottom": "10px"}
)
source_div = Div(
    text="<p><small>Source: Data for Good, Meta</small></p>",
    styles={"text-align": "center", "font-size": "10pt", "margin-top": "10px"}
)

# Combine the dropdowns, plot, and other elements into the layout
visualization_layout = column(title_div, subtitle_div, district_dropdown, date_dropdown, p, source_div)

# Create a "How to Read" tab
how_to_read_div = Div(
    text="""
    <h2>How to Read</h2>
    <p>This visualization shows the top 10 districts colocated with the selected district and date based on colocation rates.</p>
    <h3>Colocation Rate</h3>
    <p>The <strong>Colocation Rate</strong> represents the average probability that a person from the selected district was in close proximity to a person from another district during the selected week.</p>
    <p>Areas with higher mean colocation probabilities are more likely to share social contacts than those with lower probabilities.</p>
    """,
    width=800
)

tabs = []

tabs.append(TabPanel(child=visualization_layout, title="Visualization"))
tabs.append(TabPanel(child=how_to_read_div, title="How to read it?"))

tabs_layout = Tabs(tabs=tabs)

# Show the tabs
show(tabs_layout)

