Economic Analysis of Network Test Performance in Bangladesh#

The analysis of network testing data in Bangladesh after reveals a sharp divergence in testing activity across regions and districts. A key factor contributing to this volatility is the government-imposed internet shutdown between July 18 and July 23, which led to a complete halt in testing during this period. This disruption likely explains the sharp declines observed in many districts, followed by abrupt increases in test counts once connectivity was restored.

Number of Internet Test Taken#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['count'].sum()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Daily Network Performance Tests in {region_name.capitalize()}",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['count'].sum() > 0:
            source = ColumnDataSource(district_data)
            p.line(
                x='date_only',
                y='count',
                source=source,
                line_width=2,
                legend_label=f"{district.capitalize()}",
                color=colors[i]
            )

    # Add vertical lines for shutdown period
    july_18 = datetime(2024, 7, 18)  # Start of internet shutdown
    july_23 = datetime(2024, 7, 23)  # End of internet shutdown
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
    # Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation (July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration (July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Number of Tests", "@count{0,0}"),
            ("District", "@district"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Number of Network Performance Tests'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
/var/folders/k7/tnfx_2n55hd1cf9b5gv3gp640000gn/T/ipykernel_66243/3071509058.py:1: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
Loading BokehJS ...

Percentage Change in Daily Test Counts vs baseline (after July 15)#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['count'].sum()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def calculate_percentage_change(district_data):
    # Filter July data
    july_data = district_data[district_data['date_only'].dt.month == 7].copy()
    
    # Calculate baseline (average of first 15 days)
    baseline = july_data[july_data['date_only'].dt.day <= 15]['count'].mean()
    
    # Calculate percentage change for days after July 15
    july_data = july_data[july_data['date_only'].dt.day > 15]
    july_data['pct_change'] = ((july_data['count'] - baseline) / baseline) * 100
    
    return july_data

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Percentage Change in Daily Tests vs Baseline (After July 15) - {region_name.capitalize()}",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['count'].sum() > 0:
            # Calculate percentage changes for this district
            plot_data = calculate_percentage_change(district_data)
            if not plot_data.empty:  # Only create line if we have data after July 15
                source = ColumnDataSource(plot_data)
                p.line(
                    x='date_only',
                    y='pct_change',
                    source=source,
                    line_width=2,
                    legend_label=f"{district.capitalize()}",
                    color=colors[i]
                )

    # Add reference line at 0% (only for the visible date range)
    date_range = region_df[region_df['date_only'].dt.day > 15]['date_only']
    if not date_range.empty:
        p.line(
            x=[date_range.min(), date_range.max()],
            y=[0, 0],
            line_width=2,
            color='red',
            line_dash='dashed',
            legend_label='Baseline'
        )

    # Add vertical lines for July 18 and July 23
    july_18 = datetime(2024, 7, 18)  # Adjust year as needed
    july_23 = datetime(2024, 7, 23)  # Adjust year as needed
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
# Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation(July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration(July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Percentage Change", "@pct_change{0.1}%"),
            ("Daily Tests", "@count{0,0}"),
            ("District", "@district"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Percentage Change (%)'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
Loading BokehJS ...

Hide code cell source

def export_network_test_data(df, output_path='network_test_metrics.csv'):
    """Export network test data with percentage changes for each district."""
    try:
        # Initialize list to store processed data
        processed_data = []
        
        # Process each region and district
        for region in df['region'].unique():
            region_data = df[df['region'] == region]
            
            for district in region_data['district'].unique():
                district_data = region_data[region_data['district'] == district]
                
                # Filter for July data
                july_data = district_data[district_data['date_only'].dt.month == 7].copy()
                
                if not july_data.empty:
                    # Calculate baseline (average of first 15 days)
                    baseline = july_data[july_data['date_only'].dt.day <= 15]['count'].mean()
                    
                    # Process each day's data
                    for _, row in july_data.iterrows():
                        pct_change = ((row['count'] - baseline) / baseline * 100) if baseline != 0 else 0
                        
                        processed_data.append({
                            'Date': row['date_only'],
                            'Region': region,
                            'District': district,
                            'Daily_Tests': row['count'],
                            'Baseline_Tests': baseline,
                            'Percentage_Change': pct_change,
                            'Is_After_July_15': row['date_only'].day > 15
                        })
        
        # Convert to DataFrame and export
        metrics_df = pd.DataFrame(processed_data)
        metrics_df = metrics_df.sort_values(['Region', 'District', 'Date'])
        metrics_df.to_csv(output_path, index=False)
        print(f"Network test metrics exported to {output_path}")
        
        return metrics_df
        
    except Exception as e:
        print(f"Error exporting network test metrics: {str(e)}")
        return None

# Example usage:
file_path = '../../data/Filtered_NetworkPerformance_balanced.csv'
df = pd.read_csv(file_path, parse_dates=['date_only'])
network_metrics = export_network_test_data(df, '../../data/network_test_metrics.csv')
Network test metrics exported to ../../data/network_test_metrics.csv

Average download speed tests#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Calculate baseline users (first 15 days) for each district
district_baselines = {}
for district in df['district'].unique():
    district_data = df[df['district'] == district]
    first_15_days = district_data[district_data['date_only'].dt.day <= 15]
    avg_users = first_15_days['count'].mean()
    district_baselines[district] = avg_users

# Apply threshold and adjust download speeds
for district in df['district'].unique():
    district_mask = df['district'] == district
    baseline = district_baselines[district]
    # Set download speed to 0 if users are less than 90% of baseline
    df.loc[district_mask & (df['count'] < 0.9 * baseline), 'av_val_download_mbps'] = 0

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['av_val_download_mbps'].mean()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Average Daily Download Speed in {region_name.capitalize()} (Adjusted for Low Usage)",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['av_val_download_mbps'].mean() > 0:
            source = ColumnDataSource(district_data)
            p.line(
                x='date_only',
                y='av_val_download_mbps',
                source=source,
                line_width=2,
                legend_label=district.capitalize(),
                color=colors[i]
            )

    # Add vertical lines for shutdown period
    july_18 = datetime(2024, 7, 18)
    july_23 = datetime(2024, 7, 23)
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
    # Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation (July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration (July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Average Download Speed (Mbps)", "@av_val_download_mbps{0.00}"),
            ("Number of Tests", "@count"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Download Speed (Mbps)'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
Loading BokehJS ...

Percentage Change in Average Download Speeds vs baseline (after July 15)#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Calculate baseline users (first 15 days) for each district and apply threshold
district_baselines = {}
for district in df['district'].unique():
    district_data = df[df['district'] == district]
    first_15_days = district_data[district_data['date_only'].dt.day <= 15]
    avg_users = first_15_days['count'].mean()
    district_baselines[district] = avg_users

# Create a copy of download speeds before applying threshold
df['original_download'] = df['av_val_download_mbps']

# Apply user threshold and adjust download speeds
for district in df['district'].unique():
    district_mask = df['district'] == district
    baseline = district_baselines[district]
    # Set download speed to 0 if users are less than 90% of baseline
    df.loc[district_mask & (df['count'] < 0.9 * baseline), 'av_val_download_mbps'] = 0

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['count'].sum()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def calculate_download_percentage_change(district_data):
    # Filter July data
    july_data = district_data[district_data['date_only'].dt.month == 7].copy()
    
    # Calculate baseline download speed (average of first 15 days)
    baseline = july_data[july_data['date_only'].dt.day <= 15]['av_val_download_mbps'].mean()
    
    # Calculate percentage change for days after July 15
    july_data = july_data[july_data['date_only'].dt.day > 15]
    july_data['pct_change'] = ((july_data['av_val_download_mbps'] - baseline) / baseline) * 100
    
    return july_data

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Percentage Change in Download Speed vs Baseline (After July 15) - {region_name.capitalize()}",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['av_val_download_mbps'].mean() > 0:
            # Calculate percentage changes for this district
            plot_data = calculate_download_percentage_change(district_data)
            if not plot_data.empty:  # Only create line if we have data after July 15
                source = ColumnDataSource(plot_data)
                p.line(
                    x='date_only',
                    y='pct_change',
                    source=source,
                    line_width=2,
                    legend_label=f"{district.capitalize()}",
                    color=colors[i]
                )

    # Add reference line at 0%
    date_range = region_df[region_df['date_only'].dt.day > 15]['date_only']
    if not date_range.empty:
        p.line(
            x=[date_range.min(), date_range.max()],
            y=[0, 0],
            line_width=2,
            color='red',
            line_dash='dashed',
            legend_label='Baseline'
        )

    # Add vertical lines for shutdown period
    july_18 = datetime(2024, 7, 18)
    july_23 = datetime(2024, 7, 23)
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
    # Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation (July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration (July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Percentage Change", "@pct_change{0.1}%"),
            ("Download Speed (Mbps)", "@av_val_download_mbps{0.00}"),
            ("Number of Tests", "@count{0,0}"),
            ("District", "@district"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Download Speed Change (%)'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
Loading BokehJS ...

Average Upload Speed Test#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Calculate baseline users (first 15 days) for each district
district_baselines = {}
for district in df['district'].unique():
    district_data = df[df['district'] == district]
    first_15_days = district_data[district_data['date_only'].dt.day <= 15]
    avg_users = first_15_days['count'].mean()
    district_baselines[district] = avg_users

# Apply threshold and adjust upload speeds
for district in df['district'].unique():
    district_mask = df['district'] == district
    baseline = district_baselines[district]
    # Set upload speed to 0 if users are less than 90% of baseline
    df.loc[district_mask & (df['count'] < 0.9 * baseline), 'av_val_upload_mbps'] = 0

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['av_val_upload_mbps'].mean()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Average Daily Upload Speed in {region_name.capitalize()} (Adjusted for Low Usage)",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['av_val_upload_mbps'].mean() > 0:
            source = ColumnDataSource(district_data)
            p.line(
                x='date_only',
                y='av_val_upload_mbps',
                source=source,
                line_width=2,
                legend_label=district.capitalize(),
                color=colors[i]
            )

    # Add vertical lines for shutdown period
    july_18 = datetime(2024, 7, 18)
    july_23 = datetime(2024, 7, 23)
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
    # Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation (July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration (July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Average Upload Speed (Mbps)", "@av_val_upload_mbps{0.00}"),
            ("Number of Tests", "@count"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Upload Speed (Mbps)'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
Loading BokehJS ...

Percentage change in upload speed vs baseline (after July 15)#

Hide code cell source

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, Legend, Span
from bokeh.layouts import column, row
from bokeh.palettes import Category20
from datetime import datetime

# Load the CSV file and get top 8 regions
df = pd.read_csv('../../data/Filtered_NetworkPerformance_balanced.csv', parse_dates=['date_only'])

# Calculate baseline users (first 15 days) for each district and apply threshold
district_baselines = {}
for district in df['district'].unique():
    district_data = df[df['district'] == district]
    first_15_days = district_data[district_data['date_only'].dt.day <= 15]
    avg_users = first_15_days['count'].mean()
    district_baselines[district] = avg_users

# Create a copy of upload speeds before applying threshold
df['original_upload'] = df['av_val_upload_mbps']

# Apply user threshold and adjust upload speeds
for district in df['district'].unique():
    district_mask = df['district'] == district
    baseline = district_baselines[district]
    # Set upload speed to 0 if users are less than 90% of baseline
    df.loc[district_mask & (df['count'] < 0.9 * baseline), 'av_val_upload_mbps'] = 0

# Create mapping of valid districts by region
region_district_map = {}
for region in df['region'].unique():
    region_data = df[df['region'] == region]
    valid_districts = region_data.groupby('district')['count'].sum()
    valid_districts = valid_districts[valid_districts > 0].index.tolist()
    region_district_map[region] = valid_districts

# Get top 8 regions
top_regions = df['region'].unique()[:8]
filtered_df = df[df['region'].isin(top_regions)]

def calculate_upload_percentage_change(district_data):
    # Filter July data
    july_data = district_data[district_data['date_only'].dt.month == 7].copy()
    
    # Calculate baseline upload speed (average of first 15 days)
    baseline = july_data[july_data['date_only'].dt.day <= 15]['av_val_upload_mbps'].mean()
    
    # Calculate percentage change for days after July 15
    july_data = july_data[july_data['date_only'].dt.day > 15]
    july_data['pct_change'] = ((july_data['av_val_upload_mbps'] - baseline) / baseline) * 100
    
    return july_data

def get_region_plot(region_df, region_name):
    p = figure(
        title=f"Percentage Change in Upload Speed vs Baseline (After July 15) - {region_name.capitalize()}",
        x_axis_type='datetime',
        width=1000,
        height=600
    )

    # Get only valid districts for this region
    valid_districts = region_district_map[region_name]
    districts = sorted([d for d in valid_districts if d in region_df['district'].unique()])
    colors = Category20[20] if len(districts) <= 20 else [Category20[20][i % 20] for i in range(len(districts))]

    for i, district in enumerate(districts):
        district_data = region_df[region_df['district'] == district]
        if district_data['av_val_upload_mbps'].mean() > 0:
            # Calculate percentage changes for this district
            plot_data = calculate_upload_percentage_change(district_data)
            if not plot_data.empty:  # Only create line if we have data after July 15
                source = ColumnDataSource(plot_data)
                p.line(
                    x='date_only',
                    y='pct_change',
                    source=source,
                    line_width=2,
                    legend_label=f"{district.capitalize()}",
                    color=colors[i]
                )

    # Add reference line at 0%
    date_range = region_df[region_df['date_only'].dt.day > 15]['date_only']
    if not date_range.empty:
        p.line(
            x=[date_range.min(), date_range.max()],
            y=[0, 0],
            line_width=2,
            color='red',
            line_dash='dashed',
            legend_label='Baseline'
        )

    # Add vertical lines for shutdown period
    july_18 = datetime(2024, 7, 18)
    july_23 = datetime(2024, 7, 23)
    
    vline_18 = Span(location=july_18, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    vline_23 = Span(location=july_23, dimension='height', line_color='black', 
                    line_width=2, line_dash='dashed')
    
    p.add_layout(vline_18)
    p.add_layout(vline_23)
    
    # Add legend entries for vertical lines
    p.line([july_18, july_18], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Shutdown Initiation (July 18)')
    p.line([july_23, july_23], [0, 0], line_color='black', line_width=2, 
           line_dash='dashed', legend_label='Internet Service Restoration (July 23)')

    # Configure legend
    p.add_layout(p.legend[0], 'right')
    p.legend.click_policy = "hide"
    p.legend.title = f'Districts in {region_name.capitalize()}'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "12pt"
    p.legend.label_text_font_size = "10pt"
    p.legend.background_fill_alpha = 0.7
    p.legend.border_line_color = "gray"
    p.legend.border_line_alpha = 0.5

    # Add hover tool
    p.add_tools(HoverTool(
        tooltips=[
            ("Date", "@date_only{%F}"),
            ("Percentage Change", "@pct_change{0.1}%"),
            ("Upload Speed (Mbps)", "@av_val_upload_mbps{0.00}"),
            ("Number of Tests", "@count{0,0}"),
            ("District", "@district"),
        ],
        formatters={'@date_only': 'datetime'},
        mode='vline'
    ))

    # Configure axes
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Upload Speed Change (%)'
    p.xaxis.axis_label_text_font_size = '12pt'
    p.yaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '14pt'

    # Style the plot
    p.grid.grid_line_color = "gray"
    p.grid.grid_line_alpha = 0.3
    p.background_fill_color = "#f8f9fa"
    p.border_fill_color = "white"

    return p

# Create and show tabs
tabs = [TabPanel(child=row(get_region_plot(filtered_df[filtered_df['region'] == region], region)),
                title=region.capitalize())
        for region in top_regions]

output_notebook()
show(Tabs(tabs=tabs, sizing_mode="scale_both"))
Loading BokehJS ...