S2S Kenya - Zonal statistics#

Zonal statistics are run on the standardized H3 grid; the process is run on a country-by-country basis.

For the zonal statistics, each zonal statistic is run against the source dataset as a whole, then it is stratified by urban classification from the European Commission - GHS-SMOD. This creates an summary dataset that has the standard zonal stats columns (SUM, MEAN, MAX, MIN) as well as the same for urban areas (SUM_urban, MEAN_urban, MAX_urban, MIN_urban).

Hide code cell source
import sys, os, importlib, math, multiprocessing
import rasterio, geojson

import pandas as pd
import geopandas as gpd
import numpy as np

from h3 import h3
from tqdm import tqdm
from shapely.geometry import Polygon

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
import GOSTRocks.mapMisc as mapMisc
from GOSTRocks.misc import tPrint

sys.path.append("../src")
import h3_helper
import country_zonal

%load_ext autoreload
%autoreload 2
/home/wb411133/.conda/envs/ee/lib/python3.9/site-packages/geopandas/_compat.py:106: UserWarning: The Shapely GEOS version (3.9.1-CAPI-1.14.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.
  warnings.warn(
Hide code cell source
sel_iso3 = "KEN"
h3_level = 6

admin_bounds = "/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM2.shp"
out_folder = f"/home/wb411133/projects/Space2Stats/{sel_iso3}"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
global_urban = "/home/public/Data/GLOBAL/GHSL/SMOD/GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
Hide code cell source
inA = gpd.read_file(admin_bounds)
selA = inA.loc[inA['ISO_A3'] == sel_iso3].copy()
selA['ID'] = selA.index #Create ID for indexing

inU = rasterio.open(global_urban)
Hide code cell source
# As a demo, we will begin with nighttime lights layers
ntl_layers = ntl.aws_search_ntl()
ntl_file = ntl_layers[-1]

Generate zonal statistics at h3 based on urbanization levels#

Hide code cell source
def get_per(x):
    try:
        return(x['SUM_urban']/x['SUM'] * 100)
    except:
        return(0)

Population#

Hide code cell source
global_pop_layer = "/home/public/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
#popR = rasterio.open(global_pop_layer)
#popR.profile
zonalC = country_zonal.country_h3_zonal(sel_iso3, selA, "ID", h3_level, out_folder)
zonal_res_pop = zonalC.zonal_raster_urban(global_pop_layer, global_urban)
Generating h3 grid level 6: 100%|██████████| 365/365 [00:07<00:00, 50.52it/s]
(1, 11563, 7953)
zonal_res_pop
SUM MIN MAX MEAN shape_id SUM_urban MIN_urban MAX_urban MEAN_urban
0 68.974775 0.0 17.155411 0.017668 867a413afffffff 0.0 0.0 0.0 0.000000
1 3.635393 0.0 3.635393 0.000946 867a740a7ffffff 0.0 0.0 0.0 0.000000
2 23259.264508 0.0 54.015900 5.928948 867a4c2afffffff 3698.0 0.0 1.0 0.942646
3 11.093490 0.0 2.705729 0.002883 867a4a9a7ffffff 0.0 0.0 0.0 0.000000
4 204.227029 0.0 59.439167 0.052950 867a43a87ffffff 0.0 0.0 0.0 0.000000
... ... ... ... ... ... ... ... ... ...
15246 1175.665880 0.0 39.080967 0.298468 867a4c99fffffff 126.0 0.0 1.0 0.031988
15247 92.193008 0.0 58.079727 0.024770 867a5bb0fffffff 0.0 0.0 0.0 0.000000
15248 53.785473 0.0 14.338305 0.014278 867a5d697ffffff 0.0 0.0 0.0 0.000000
15249 70.968876 0.0 19.066265 0.019243 867a5bcafffffff 0.0 0.0 0.0 0.000000
15250 7622.588723 0.0 61.932106 1.891461 867a69627ffffff 355.0 0.0 1.0 0.088089

15251 rows × 9 columns

map_data_pop = zonal_res_pop.sort_values("SUM_urban").loc[:,['shape_id','SUM','SUM_urban']].copy()
map_data_pop = pd.merge(zonalC.h3_cells, map_data_pop, on='shape_id')
map_data_pop['per_urban'] = map_data_pop.apply(get_per, axis=1)
map_data_pop.sort_values('SUM', ascending=False)
shape_id index_right NAM_1 GAUL_2 NAM_2 geometry SUM SUM_urban per_urban
0 867a42747ffffff 36824 Marsabit 0 Laisamis POLYGON ((38.14369 1.55168, 38.15556 1.58313, ... 0.000000e+00 0.0 0.000000
2802 867a46c1fffffff 36689 Garissa 0 Balambala POLYGON ((39.22998 0.19729, 39.24169 0.22906, ... 0.000000e+00 0.0 0.000000
7501 867a512d7ffffff 36894 Marsabit 0 North Horr POLYGON ((38.44169 2.51242, 38.45345 2.54352, ... 0.000000e+00 0.0 0.000000
2806 866a58ae7ffffff 36955 Turkana 0 Turkana West POLYGON ((34.84366 4.46588, 34.85599 4.49660, ... 0.000000e+00 0.0 0.000000
7494 867a66b17ffffff 36734 Tana River 0 Garsen POLYGON ((39.17285 -2.98710, 39.18473 -2.95444... 0.000000e+00 0.0 0.000000
... ... ... ... ... ... ... ... ... ...
4746 867a6e417ffffff 36913 Nairobi 0 Roysambu POLYGON ((36.93423 -1.21823, 36.94647 -1.18585... 4.574501e+05 455681.0 99.613269
9348 867a6e407ffffff 36913 Nairobi 0 Roysambu POLYGON ((36.88711 -1.25555, 36.89936 -1.22315... 5.515042e+05 549574.0 99.650015
12245 867a6e55fffffff 36781 Nairobi 0 Kibra POLYGON ((36.79280 -1.33023, 36.80507 -1.29781... 5.933135e+05 591558.0 99.704123
6380 867a6e437ffffff 36772 Nairobi 0 Kasarani POLYGON ((36.94461 -1.27810, 36.95685 -1.24570... 7.745367e+05 772815.0 99.777711
9201 867a6e427ffffff 36839 Nairobi 0 Makadara POLYGON ((36.89748 -1.31544, 36.90974 -1.28303... 1.188181e+06 1186359.0 99.846667

15251 rows × 9 columns

map_plt = mapMisc.static_map_vector(map_data_pop, "SUM", thresh=[50, 1000, 5000, 10000, 50000, 100000, 500000, 1500000], figsize=(15, 15))
map_plt.title("Total population")
map_plt.show()
../_images/dd1a39a23cddc3dd950ac22aaa0665f858b92b5959f3dec0af350ab9b328f4c0.png
map_plt = mapMisc.static_map_vector(map_data_pop, "per_urban", thresh=[0, 20, 40, 60, 80, 100, 1000], figsize=(15,15))
map_plt.title("Percent population in urban areas")
map_plt.show()
../_images/5d99f7f20fc56179c9a961fb9550875560abd46841df2df4085e125922730625.png

Nighttime Lights#

Hide code cell source
zonalC = country_zonal.country_h3_zonal(sel_iso3, selA, "ID", h3_level, out_folder)
zonal_res = zonalC.zonal_raster_urban(ntl_file, global_urban)

map_data = zonal_res.sort_values("SUM_urban").loc[:,['shape_id','SUM','SUM_urban']].copy()
map_data = pd.merge(zonalC.h3_cells, map_data, on='shape_id')
map_data['per_urban'] = map_data.apply(get_per, axis=1)
map_data.sort_values('SUM', ascending=False).head()
shape_id index_right NAM_1 GAUL_2 NAM_2 geometry SUM SUM_urban per_urban
6351 866a58a67ffffff 36955 Turkana 0 Turkana West POLYGON ((34.92733 4.59296, 34.93965 4.62362, ... 36.120003 0.0 0.000000
8183 866a59b0fffffff 36953 Turkana 0 Turkana North POLYGON ((35.87276 4.59425, 35.88490 4.62484, ... 37.129997 0.0 0.000000
1634 866a59857ffffff 36953 Turkana 0 Turkana North POLYGON ((35.61105 4.59085, 35.62324 4.62146, ... 39.250000 0.0 0.000000
2795 866a58bafffffff 36955 Turkana 0 Turkana West POLYGON ((34.66464 4.58921, 34.67700 4.61989, ... 39.839996 0.0 0.000000
12170 867ae185fffffff 36844 Mandera 0 Mandera North POLYGON ((40.98292 3.56723, 40.99410 3.59766, ... 40.509998 0.0 0.000000
... ... ... ... ... ... ... ... ... ...
11618 867a6e42fffffff 36968 Nairobi 0 Westlands POLYGON ((36.83997 -1.29288, 36.85223 -1.26047... 4407.709961 4147.0 94.085138
12431 867b59b2fffffff 36711 Mombasa 0 Changamwe POLYGON ((39.63531 -4.06321, 39.64715 -4.03034... 4457.299805 3930.0 88.169972
579 867b59b27ffffff 36872 Mombasa 0 Mvita POLYGON ((39.69284 -4.08543, 39.70467 -4.05255... 4545.560547 4409.0 96.995738
9201 867a6e427ffffff 36839 Nairobi 0 Makadara POLYGON ((36.89748 -1.31544, 36.90974 -1.28303... 6022.190430 5929.0 98.452549
6380 867a6e437ffffff 36772 Nairobi 0 Kasarani POLYGON ((36.94461 -1.27810, 36.95685 -1.24570... 6690.269531 6595.0 98.575999

15251 rows × 9 columns

map_plt = mapMisc.static_map_vector(map_data, "SUM", thresh=[50,100,500,1000,7000], figsize=(15,15))
map_plt.title("Total Nighttime Brightness")
map_plt.show()
../_images/f99178e829c968fc4835eb4a86907108fc097040e55ab54f262cd68baaf949f4.png
map_plt = mapMisc.static_map_vector(map_data, "per_urban", figsize=(15,15))
map_plt.title("Percent nighttime lights in urban areas")
map_plt.show()
../_images/b42c311dfeaf612da09b9aee74889efdc5c94bfde705764ba87a94436b7c6ea6.png

Flood Depth#

Hide code cell source
flood_depth = '/home/public/Data/GLOBAL/FLOOD_SSBN/v2_2019/Kenya/fluvial_undefended/FU_1in1000.tif'
zonalC = country_zonal.country_h3_zonal(sel_iso3, selA, "ID", h3_level, out_folder)
zonal_res = zonalC.zonal_raster_urban(flood_depth, global_urban, minVal=0, maxVal=10)

map_data = zonal_res.sort_values("SUM_urban").loc[:,['shape_id','SUM','SUM_urban']].copy()
map_data = pd.merge(zonalC.h3_cells, map_data, on='shape_id')
map_data['per_urban'] = map_data.apply(get_per, axis=1)
map_data.sort_values('SUM', ascending=False).head()
shape_id index_right NAM_1 GAUL_2 NAM_2 geometry SUM SUM_urban per_urban
4829 866a59a77ffffff 36953 Turkana 0 Turkana North POLYGON ((35.99526 4.49305, 36.00739 4.52367, ... 13498.005859 0.0 0.000000
5171 866a59a57ffffff 36953 Turkana 0 Turkana North POLYGON ((36.00521 4.43604, 36.01734 4.46668, ... 9973.853516 0.0 0.000000
5318 866a59b57ffffff 36953 Turkana 0 Turkana North POLYGON ((35.92904 4.57214, 35.94117 4.60273, ... 9751.447266 0.0 0.000000
5993 867a5da77ffffff 36952 Turkana 0 Turkana East POLYGON ((36.21001 2.04327, 36.22222 2.07472, ... 9321.406250 0.0 0.000000
199 867b5b0cfffffff 36734 Tana River 0 Garsen POLYGON ((40.35870 -2.50227, 40.37030 -2.46985... 8719.301758 79.0 0.906036
map_plt = mapMisc.static_map_vector(map_data, "SUM", thresh=[0,100,500,1000,5000,15000], figsize=(15,15))
map_plt.title("Total flood depth")
map_plt.show()
../_images/36d227879b78a837c7f3c24202941647481aac37bcfe5a19262b01a25870bb1f.png
Hide code cell source
# Use flooding as mask for summing population
flood_r = rasterio.open(flood_depth)
flood_data = flood_r.read()
flood_data = ((flood_data > 0) * (flood_data < 100)).astype(int)
'''
flood_mask_file = os.path.join(out_folder, os.path.basename(flood_depth).replace(".tif", "_mask.tif"))
with rasterio.open(flood_mask_file, 'w', **flood_r.profile) as out_flood:
    out_flood.write(flood_data)
flood_mask = rasterio.open(flood_mask_file)
zonal_res = zonalC.zonal_raster_urban(global_pop_layer, flood_mask, minVal=0, urban_mask_val=[1])
'''
with rMisc.create_rasterio_inmemory(flood_r.profile, flood_data) as flood_mask:
    zonal_res_pop = zonalC.zonal_raster_urban(global_pop_layer, flood_mask, minVal=0, urban_mask_val=[1])

map_data_pop = zonal_res_pop.sort_values("SUM_urban").loc[:,['shape_id','SUM','SUM_urban']].copy()
map_data_pop = pd.merge(zonalC.h3_cells, map_data_pop, on='shape_id')
map_data_pop['per_urban'] = map_data_pop.apply(get_per, axis=1)
map_data_pop.sort_values('SUM', ascending=False).head()
shape_id index_right NAM_1 GAUL_2 NAM_2 geometry SUM SUM_urban per_urban
0 867a42747ffffff 36824 Marsabit 0 Laisamis POLYGON ((38.14369 1.55168, 38.15556 1.58313, ... -1 -1.0 100.0
10172 866a5914fffffff 36953 Turkana 0 Turkana North POLYGON ((35.70693 4.34056, 35.71911 4.37125, ... -1 -1.0 100.0
10160 867a7545fffffff 36729 Garissa 0 Fafi POLYGON ((40.09253 -1.34859, 40.10414 -1.31645... -1 -1.0 100.0
10161 867a74217ffffff 36747 Garissa 0 Ijara POLYGON ((40.70511 -1.53392, 40.71660 -1.50179... -1 -1.0 100.0
10162 867a61c77ffffff 36761 Kajiado 0 Kajiado South POLYGON ((37.40516 -2.69114, 37.41739 -2.65841... -1 -1.0 100.0
map_plt = mapMisc.static_map_vector(map_data_pop, "SUM", figsize=(15,15), thresh=[1,5000,10000,50000,100000,10000000],edgecolor="match")
map_plt.title("Population exposed to flood")
map_plt.show()
/home/wb411133/.conda/envs/ee/lib/python3.9/site-packages/contextily/tile.py:581: UserWarning: The inferred zoom level of 27 is not valid for the current tile provider (valid zooms: 0 - 20).
  warnings.warn(msg)
../_images/92183251566c45f3c4b31d3b476d56ba6bf6676f608505a38b18ed40eacbfd7d.png
map_plt = mapMisc.static_map_vector(map_data_pop, "per_urban", figsize=(15,15), thresh=[1,25,50,70,90,100],edgecolor="match")
map_plt.title("Percent of population exposed to flood")
map_plt.show()
../_images/44bcbb158e79ec2aa280e6f698722cb8a1838e82ed8bb04060f5fafa32ce3bb6.png

Join h3 zonal results to admin boundaries#

below we compare timing the administrative summaries with and without fractional intersections. While there is a difference between the two, the overall process is quite fast. As the is no computational limitation, we will rely on the fractional intersection as it is more accurate.

### These two cells are used to test the speed of joining with and without
# %timeit cur_res = country_zonal.connect_polygons_h3_stats(selA, zonal_res, h3_level, "ID", True)
20.6 s ± 62.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# %timeit cur_res = country_zonal.connect_polygons_h3_stats(selA, zonal_res, h3_level, "ID", False)
16.3 s ± 62.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
cur_res = country_zonal.connect_polygons_h3_stats(selA, zonal_res_pop, h3_level, "ID", True)
map_admin = pd.merge(selA, cur_res, left_on="ID", right_on='id')
map_admin['per_urban'] = map_admin.apply(get_per, axis=1)
map_plt = mapMisc.static_map_vector(map_admin, "SUM", figsize=(15,15))
map_plt.title("Total Population")
map_plt.show()
../_images/39aba5e50e0384cbebfa52b8ebebcdb22416d1d6e45098587cf05ef752db2981.png
map_plt = mapMisc.static_map_vector(map_admin, "per_urban", figsize=(15,15))
map_plt.title("Percent uban population")
map_plt.show()
../_images/1fcb1aa0a9d81a8c89a63417f11a8f8336e3e3f7b513e80c9739957f58920eea.png

DEBUGGING#

global_pop_layer = "/home/public/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
popR = rasterio.open(global_pop_layer)
#popR.profile
rMisc.clipRaster(popR, zonalC.h3_cells, os.path.join(out_folder, os.path.basename(global_pop_layer)))
[array([[[-200., -200., -200., ..., -200., -200., -200.],
         [-200., -200., -200., ..., -200., -200., -200.],
         [-200., -200., -200., ..., -200., -200., -200.],
         ...,
         [-200., -200., -200., ..., -200., -200., -200.],
         [-200., -200., -200., ..., -200., -200., -200.],
         [-200., -200., -200., ..., -200., -200., -200.]]]),
 {'driver': 'GTiff',
  'dtype': 'float64',
  'nodata': -200.0,
  'width': 7913,
  'height': 11511,
  'count': 1,
  'crs': CRS.from_wkt('PROJCS["World_Mollweide",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mollweide"],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'),
  'transform': Affine(100.0, 0.0, 3397700.0,
         0.0, -100.0, 574900.0)}]