Drought Analysis in Sri Lanka

Contents

Drought Analysis in Sri Lanka#

This notebook guides you through:

Fetching adm boundaries.
Fetching timeseries climate data (SPI).
Identifying the number of extreme drought days.
Plotting and mapping the results.

# !pip install space2stats-client folium mapclassify matplotlib plotnine

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from space2stats_client import Space2StatsClient
from shapely.geometry import shape
import json
from plotnine import (
    ggplot,
    aes,
    geom_map,
    coord_fixed,
    facet_wrap,
    scale_fill_distiller,
    element_rect,
    theme_void,
    theme,
)

Fetch ADM Boundaries#

# Set up the client and fetch ADM2 boundaries for Sri Lanka
client = Space2StatsClient() # if locally, use base_url='http://localhost:8000'
ISO3 = "LKA"  # Sri Lanka 
ADM = "ADM2"
adm2_boundaries = client.fetch_admin_boundaries(ISO3, ADM)

print(f"Retrieved {len(adm2_boundaries)} ADM2 regions")
adm2_boundaries.plot(figsize=(3, 3))
plt.title("Sri Lanka ADM2 Regions")

Retrieved 25 ADM2 regions

Text(0.5, 1.0, 'Sri Lanka ADM2 Regions')

../_images/27de210ee7469c37b53213562cfa216393006a41ea6e0c9573c8ba78e51b1ffa.png

Fetch SPI Timeseries Data#

# Define the fields and parameters
fields = ["spi"]
all_adm2_data = []

# Loop through each ADM2 region to fetch its data
for idx, adm2_feature in adm2_boundaries.iterrows():
    try:
        feature_gdf = gpd.GeoDataFrame([adm2_feature], geometry='geometry')

        result_df = client.get_timeseries(
            gdf=feature_gdf,
            spatial_join_method="centroid",
            fields=fields,
            start_date="2019-01-01",
            geometry= "polygon"
        )

        if not result_df.empty:
            region_name = adm2_feature['shapeName']
            result_df['region'] = region_name
            all_adm2_data.append(result_df)
            print(f"Retrieved data for {region_name}")
        else:
            print(f"No data found for {adm2_feature['shapeName']}")
    except Exception as e:
        print(f"Error retrieving data for region: {str(e)}")

# Combine all the dataframes
if all_adm2_data:
    all_adm2_data = pd.concat(all_adm2_data, ignore_index=True)
    print(f"Total records: {len(all_adm2_data)}")
else:
    print("No data was retrieved")

Retrieved data for Jaffna District
Retrieved data for Kilinochchi District
Retrieved data for Mannar District
Retrieved data for Mullaitivu District
Retrieved data for Vavuniya District
Retrieved data for Galle District
Retrieved data for Hambantota District
Retrieved data for Matara District
Retrieved data for Ampara District
Retrieved data for Anuradhapura District
Retrieved data for Badulla District
Retrieved data for Batticaloa District
Retrieved data for Monaragala District
Retrieved data for Polonnaruwa District
Retrieved data for Colombo District
Retrieved data for Gampaha District
Retrieved data for Kalutara District
Retrieved data for Kegalle District
Retrieved data for Kurunegala District
Retrieved data for Puttalam District
Retrieved data for Ratnapura District
Retrieved data for Kandy District
Retrieved data for Matale District
Retrieved data for Nuwara Eliya District
Retrieved data for Trincomalee District
Total records: 122688

Map Monthly Hex Data (2024)#

# Convert response data to a pandas dataframe
df = pd.DataFrame(all_adm2_data)

# Convert date strings to datetime objects
if 'date' in df.columns and not pd.api.types.is_datetime64_any_dtype(df['date']):
    df['date'] = pd.to_datetime(df['date'])

df['year'] = df['date'].dt.year
df.head(2)

	hex_id	date	spi	geometry	area_id	region	year
0	86611a51fffffff	2019-01-01	0.420347	{"type":"Polygon","coordinates":[[[79.69893825...	0	Jaffna District	2019
1	86611a51fffffff	2019-02-01	0.335245	{"type":"Polygon","coordinates":[[[79.69893825...	0	Jaffna District	2019

# Filter year and convert geometry to shapely objects
df_filter = df.loc[df['year'] == 2024].copy()
if isinstance(df_filter["geometry"].iloc[0], str):
    df_filter["geometry"] = df_filter["geometry"].apply(json.loads)
df_filter["geometry"] = df_filter["geometry"].apply(shape)
gdf = gpd.GeoDataFrame(df_filter, geometry="geometry", crs="EPSG:4326")
gdf['ym'] = gdf['date'].dt.strftime('%Y-%m')

(
    ggplot(gdf)
    + geom_map(aes(fill="spi"), size=0)
    + scale_fill_distiller(type="div", palette="RdBu", name="SPI", limits=(-2, 2))
    + facet_wrap(
        "ym",
        ncol=4,
    )
    + coord_fixed(expand=False)
    + theme_void()
    + theme(
        figure_size=(8, 8),
        plot_margin=0.01,
        plot_background=element_rect(fill="white"),
        panel_spacing=0.025
    )
)

../_images/5a514e139f031309de216f730e868fc39ced220a0e80daaa9f3df3a7cbebae94.png

Extract Extreme Drought Events per Year#

# Convert response data to a pandas dataframe
gdf = all_adm2_data.copy()

# Convert date strings to datetime objects
if 'date' in gdf.columns and not pd.api.types.is_datetime64_any_dtype(gdf['date']):
    gdf['date'] = pd.to_datetime(gdf['date'])

gdf['year'] = gdf['date'].dt.year

# Set an extereme drought threshold
drought_threshold = -2

# Create a binary column indicating extreme drought days
gdf['extreme_drought'] = (gdf['spi'] <= drought_threshold).astype(int)

# Group by region and year, then count extreme drought days
yearly_drought = gdf.groupby(['region', 'year'])['extreme_drought'].sum().reset_index()

# Pivot the table to have years as columns
drought_pivot = yearly_drought.pivot(index='region', columns='year', values='extreme_drought')

# Rename columns for clarity
drought_pivot.columns = [f'{year}' for year in drought_pivot.columns]

# Add total drought days column
drought_pivot['Total Drought Days'] = drought_pivot.sum(axis=1)

# Sort by total drought days in descending order
drought_pivot = drought_pivot.sort_values('Total Drought Days', ascending=False)

# Reset index to make 'region' a regular column
result_table = drought_pivot.reset_index().rename(columns={'region': 'District'})

# Display the resulting table
display(result_table)

	District	2019	2020	2021	2022	2023	2024	Total Drought Days
0	Nuwara Eliya District	0	0	0	0	0	78	78
1	Kandy District	0	0	0	0	0	69	69
2	Badulla District	0	0	0	0	0	52	52
3	Matale District	0	0	0	0	0	45	45
4	Kurunegala District	16	0	0	0	0	18	34
5	Jaffna District	25	0	0	0	0	0	25
6	Monaragala District	0	0	0	0	0	24	24
7	Ratnapura District	0	0	0	0	0	22	22
8	Mullaitivu District	0	14	0	0	0	6	20
9	Kegalle District	9	0	0	0	0	11	20
10	Anuradhapura District	0	0	0	0	0	16	16
11	Kilinochchi District	10	1	0	0	0	0	11
12	Puttalam District	9	0	0	0	0	0	9
13	Trincomalee District	0	4	0	0	0	4	8
14	Gampaha District	7	0	0	0	0	0	7
15	Matara District	0	0	0	0	0	5	5
16	Polonnaruwa District	0	0	0	0	0	4	4
17	Galle District	0	0	0	0	0	2	2
18	Batticaloa District	0	1	0	0	0	0	1
19	Mannar District	1	0	0	0	0	0	1
20	Ampara District	0	0	0	0	0	0	0
21	Kalutara District	0	0	0	0	0	0	0
22	Hambantota District	0	0	0	0	0	0	0
23	Colombo District	0	0	0	0	0	0	0
24	Vavuniya District	0	0	0	0	0	0	0

Plot Extreme Drought Events#

yearly_columns = [col for col in drought_pivot.columns if col != 'Total Drought Days']

# Plot
plt.figure(figsize=(12, 7))
for idx, row in result_table.iterrows():
    district = row['District']
    values = [row[col] for col in yearly_columns]
    plt.plot(yearly_columns, values, marker='o', linewidth=2, label=district)

plt.title('Severe Drought Days - Top 5 Districts (Past 5 Years)')
plt.xlabel('Year')
plt.ylabel('Number of Severe Drought Days')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()

../_images/d55c55c40ba684eee4d9c133ede370328b356344bb95c40364c440a1797d7e61.png

Map Extreme Drought Events#

# Convert geometry field from GeoJSON to Shapely
if isinstance(gdf["geometry"].iloc[0], str):
    gdf["geometry"] = gdf["geometry"].apply(json.loads)
gdf["geometry"] = gdf["geometry"].apply(shape)

# First, let's extract unique regions with their geometries
# Group by region and take the first geometry for each region
region_geo = gdf[['hex_id', 'region', 'geometry']].drop_duplicates(subset=['hex_id'])
region_geo_gdf = gpd.GeoDataFrame(region_geo, geometry="geometry", crs="EPSG:4326")

drought_gdf = adm2_boundaries.merge(result_table, left_on='shapeName', right_on='District')

# Prepare tooltip columns
yearly_columns = [col for col in drought_gdf.columns if col.isdigit() or col == 'Total Drought Days']
tooltip_columns = ['District'] + yearly_columns

# Create the map
m = drought_gdf.explore(
    column='Total Drought Days',
    tooltip=tooltip_columns,
    cmap="OrRd",
    legend=True,
    scheme="quantiles",
    legend_kwds=dict(colorbar=True, caption=f"Total Extreme Drought Days", interval=False),
    style_kwds=dict(weight=1, fillOpacity=0.8),
    name="Drought Risk by District"
)
# Add boundaries as a separate layer for the tooltip
drought_gdf.explore(
    m=m,
    style_kwds=dict(color="black", weight=0, opacity=0.5, fillOpacity=0),
    name="District Boundaries",
    tooltip=tooltip_columns
)
m

Make this Notebook Trusted to load map: File -> Trust Notebook

National Average#

df_average = df.dropna().groupby('date')['spi'].agg(['mean']).reset_index()
df_average.head()

	date	mean
0	2019-01-01	-0.370019
1	2019-02-01	-0.257167
2	2019-03-01	-0.383291
3	2019-04-01	-0.770942
4	2019-05-01	-1.237152

from plotnine import (
    geom_bar,
    labs,
    theme_minimal,
    element_text,
    scale_y_continuous,
    scale_x_datetime
)
from mizani.breaks import date_breaks
from mizani.formatters import date_format

font = "Roboto"
p = (
    ggplot(df_average, aes(x="date", y="mean", fill="mean"))
    + geom_bar(alpha=0.8, stat="identity", color="black", width=20)
    + labs(
        x="",
        subtitle="Standardised Precipitation Index",
        title="Drought Index",
        y="",
        caption="Source: Space2Stats",
    )
    + theme_minimal()
    + theme(
        plot_background=element_rect(fill="white"),
        figure_size=(8, 6),
        text=element_text(family=font, size=11),
        plot_title=element_text(family=font, size=14, weight="bold"),
        legend_position="none",
    )
    + scale_fill_distiller(
        type="div", palette="RdYlBu", direction=1, limits=(-2, 2)
    )
    + scale_y_continuous(limits=(-2, 2))
    + scale_x_datetime(
        breaks=date_breaks(width="1 year"), labels=date_format("%Y")
    )
)
p

../_images/7839a1c42e6c779ebe50ab12d108f0a63d3972e011a49d0f6cba4d4d87f3b8b3.png