Interpretability Test

Contents

Interpretability Test#

Imports#

import os
import pandas as pd
from iqual import tests

Load datasets#

data_dir         = "../../data"

### Enhanced qualitative data
enhanced_data = pd.read_csv(os.path.join(data_dir,"kfold_enh_pred_mean.csv"))

### Quantitative data
quant_df      = pd.read_csv(os.path.join(data_dir,"quant_data.csv"))

Variables#

annotation_vars = [
     "ability_high",
     "ability_low",
     "awareness_information_high",
     "awareness_information_low",
     "budget_high",
     "budget_low",
     "covid_impacts",
     "education_high",
     "education_low",
     "education_neutral",
     "education_religious",
     "entrepreneur",
     "job_secular",
     "marriage",
     "migration",
     "no_ambition",
     "public_assistance",
     "reliance_on_god",
     "religious",
     "secular",
     "vague_job",
     "vague_non_specific",
     "vocational_training",
     "worries_anxieties",
    ]

Variables to Groupby#

id_vars_human    = ['uid','data_round','refugee','annotated']
id_vars_enhanced = ['uid','data_round','refugee','annotated','bootstrap_run']

Human Annotated Dataframe (Merged with quantiative data)#

human_df = enhanced_data.loc[enhanced_data.annotated==1,[*id_vars_human,*human_annotation_vars]].copy()
human_df = human_df.rename(columns={c+"_act":c for c in annotation_vars}).drop_duplicates()
human_df = pd.merge(human_df,quant_df,on=['uid','data_round'],how='left')

human_df['sample_type'] = 'Human'

Enhanced Dataframe (Merged with quantiative data)#

enhanced_df = enhanced_data[[*id_vars_enhanced,*annotation_vars]].copy()
enhanced_df = pd.merge(enhanced_df,quant_df,on=['uid','data_round','refugee'],how='left')

enhanced_df['sample_type'] = 'Enhanced'

HH characteristic variables for `Interpretability`#

# Numerical regressors
numerical_vars = [
     'hh_head_sex',
     'eld_sex',
     'parent_reledu',
     'num_child',
     'hh_head_age',
     'parent_eduyears',
     'eld_age',
     'hh_asset_index',
     'hh_income',
     'int_trauma_exp',
]

Interpretability tests on Human Data#

interpreter = tests.Interpretability(human_df,
                                     annotation_vars=annotation_vars,
                                     numerical_regressors=numerical_vars,
                ).fit_all()

human_interp_df = interpreter.get_results()
human_interp_df['sample_type'] = 'Human'

human_interp_df.head(5)

	annotation	fstat	log_fstat	pval	sample_type
0	ability_high	4.092906	1.409255	1.705786e-05	Human
1	ability_low	2.220586	0.797771	1.520367e-02	Human
2	awareness_information_high	4.439502	1.490542	4.458398e-06	Human
3	awareness_information_low	1.594255	0.466406	1.039609e-01	Human
4	budget_high	5.604002	1.723481	4.496422e-08	Human

Interpretability tests on Enhanced Data (Looping over Bootstrap runs)#

enh_interp_dfs = []

bootstrap_runs = enhanced_df['bootstrap_run'].unique()

for b in bootstrap_runs:
    interpreter = tests.Interpretability(enhanced_df[enhanced_df['bootstrap_run']==b],
                                         annotation_vars=annotation_vars,
                                    #     categorical_regressors=categorical_vars,
                                         numerical_regressors=numerical_vars,
                    ).fit_all()
    
    interp_df = interpreter.get_results()
    interp_df['bootstrap_run'] = b
    
    enh_interp_dfs.append(interp_df)
    
enhanced_interp_df = pd.concat(enh_interp_dfs)    
enhanced_interp_df['sample_type'] = 'Enhanced'

interpret_data = pd.concat([human_interp_df,enhanced_interp_df],axis=0)
interpret_data = interpret_data.sort_values('annotation',ascending=False,ignore_index=True)
interpret_data.head(5)

	annotation	fstat	log_fstat	pval	sample_type	bootstrap_run
0	worries_anxieties	9.836116	2.286061	2.895519e-16	Enhanced	10.0
1	worries_anxieties	10.534363	2.354643	1.333396e-17	Enhanced	3.0
2	worries_anxieties	10.886772	2.387548	2.812771e-18	Enhanced	1.0
3	worries_anxieties	8.596269	2.151328	6.682446e-14	Enhanced	9.0
4	worries_anxieties	5.055125	1.620403	3.978705e-07	Human	NaN

Calculate Differences#

enhanced_avg_df = enhanced_interp_df.groupby('annotation')['log_fstat'].mean().reset_index()
enhanced_avg_df['sample_type'] = 'Enhanced'

enhanced_avg_dict =  dict(enhanced_avg_df[['annotation','log_fstat']].values)
human_avg_dict    =  dict(human_interp_df[['annotation','log_fstat']].values)

Visualize#

import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10,10))

# Difference in F-statistic (Logarithmic) averages 
for annotation in annotation_vars:
    enh_value, human_value = enhanced_avg_dict[annotation], human_avg_dict[annotation]
    line_color = 'lightgreen' if enh_value >= human_value else 'red'
    ax.plot([enh_value, human_value], [annotation, annotation], color=line_color, linewidth=2)

# Enhanced Data Log F-statistic values for all bootstrap runs
enhanced_x = interpret_data.loc[interpret_data['sample_type'] == 'Enhanced', 'log_fstat'].tolist()
enhanced_y = interpret_data.loc[interpret_data['sample_type'] == 'Enhanced', 'annotation'].tolist()
ax.scatter(enhanced_x, enhanced_y, fc=(0,0,0, 0.1), marker='o', s=50, ec='k', lw=1, label='Enhanced')
    
# Human Data F-statistic (Logarithmic) values
human_x = interpret_data.loc[interpret_data['sample_type'] == 'Human', 'log_fstat'].tolist()
human_y = interpret_data.loc[interpret_data['sample_type'] == 'Human', 'annotation'].tolist()
ax.scatter(human_x, human_y, c='black', marker='x', s=50, label='Human',)

# Titles, Labels, Legends and formatting
ax.set_xlabel('log F statistic',fontsize=12)
ax.set_ylabel('Annotation', fontsize=12)
ax.legend(title='Sample',fontsize=12,loc='best')
ax.set_title('Figure 9: Interpretability test',fontsize=18, pad=25)
ax.grid(True, color='lightgray')
plt.show()

../../_images/eb5c134712c9918499a739e9ed4392ba476a6b88cf90756d58ce804b0d078a78.png