Efficiency - Test#

Imports#

import os
import numpy as np
import pandas as pd
from iqual import  tests

Load datasets#

data_dir         = "../../data"

### Enhanced bootstrap data
bootstrap_df = pd.read_csv(os.path.join(data_dir,"enhanced_bootstrap_mean.csv"))
bootstrap_df.head()
uid data_round refugee_status bootstrap_run annotated religious_act secular_act no_ambition_act vague_job_act job_secular_act ... ability_high ability_low budget_high budget_low awareness_information_high awareness_information_low camp_regulations covid_impacts public_assistance worries_anxieties
0 C601001009R2 R2 refugee 1 1 0.00 0.166667 0.0 0.333333 0.000000 ... 0.0 0.000000 0.0 0.333333 0.0 0.000000 0.00 0.000 0.0 0.0
1 C601001009R3 R3 refugee 1 1 0.00 0.117647 0.0 0.058824 0.294118 ... 0.0 0.058824 0.0 0.000000 0.0 0.117647 0.00 0.000 0.0 0.0
2 C601002009R2 R2 refugee 1 1 0.00 0.000000 0.0 0.125000 0.000000 ... 0.0 0.125000 0.0 0.125000 0.0 0.000000 0.25 0.000 0.0 0.0
3 C601003005R3 R3 refugee 1 1 0.00 0.000000 0.0 0.000000 0.250000 ... 0.0 0.000000 0.0 0.000000 0.0 0.000000 0.00 0.125 0.0 0.0
4 C602004004R3 R3 refugee 1 1 0.05 0.000000 0.0 0.000000 0.200000 ... 0.1 0.000000 0.0 0.000000 0.0 0.000000 0.05 0.000 0.0 0.0

5 rows × 80 columns

Annotations#

annotation_vars = [
    'ability_high', 'ability_low', 
    'awareness_information_high', 'awareness_information_low', 
    'budget_high', 'budget_low', 'covid_impacts', 
    'education_high', 'education_low', 'education_neutral', 'education_religious', 
    'entrepreneur', 'job_secular', 'marriage', 'migration', 
    'no_ambition', 'public_assistance', 'reliance_on_god', 
    'religious', 'secular', 'vague_job', 'vague_non_specific', 
    'vocational_training', 'worries_anxieties',
]

Separate Datasets#

### Create three datasets

# > Human DF
human_df   = bootstrap_df[bootstrap_df.annotated==1]

# Machine DF
machine_df = bootstrap_df[bootstrap_df.annotated==0]

# Out-sample data / In-sample average predictions

## >  Out-Sample
# test_df  = bootstrap_df[bootstrap_df.split=='test']] 

## >  In-sample average predictions
med_df     = bootstrap_df[bootstrap_df.annotated==1].groupby(['uid','refugee_status','data_round']).mean(numeric_only=True).reset_index()

Use tests.Efficiency from iQual to get Measurement Error Variances#

pd.options.display.float_format = '{:.4f}'.format

efficiency_df  = tests.Efficiency(human_df,med_df,machine_df,annotation_vars).get_results_all()
print("Measurement Error Variances")
efficiency_df[['annotation','sig2_h','sig2_y','sig2_eps','se_h','se_enh']]
Measurement Error Variances
annotation sig2_h sig2_y sig2_eps se_h se_enh
0 ability_high 0.0064 0.0098 0.0036 0.0029 0.0021
1 ability_low 0.0057 0.0050 0.0038 0.0027 0.0018
2 awareness_information_high 0.0091 0.0096 0.0070 0.0034 0.0024
3 awareness_information_low 0.0010 0.0008 0.0010 0.0011 0.0008
4 budget_high 0.0046 0.0055 0.0025 0.0024 0.0017
5 budget_low 0.0156 0.0116 0.0060 0.0044 0.0026
6 covid_impacts 0.0037 0.0030 0.0009 0.0022 0.0013
7 education_high 0.0093 0.0090 0.0055 0.0034 0.0023
8 education_low 0.0027 0.0023 0.0014 0.0019 0.0012
9 education_neutral 0.0245 0.0267 0.0108 0.0056 0.0037
10 education_religious 0.0047 0.0049 0.0023 0.0024 0.0016
11 entrepreneur 0.0053 0.0075 0.0015 0.0026 0.0018
12 job_secular 0.0156 0.0175 0.0055 0.0045 0.0029
13 marriage 0.0133 0.0127 0.0016 0.0041 0.0024
14 migration 0.0042 0.0026 0.0007 0.0023 0.0012
15 no_ambition 0.0015 0.0010 0.0010 0.0014 0.0009
16 public_assistance 0.0019 0.0017 0.0009 0.0015 0.0010
17 reliance_on_god 0.0041 0.0043 0.0020 0.0023 0.0015
18 religious 0.0060 0.0073 0.0020 0.0027 0.0018
19 secular 0.0090 0.0084 0.0042 0.0034 0.0022
20 vague_job 0.0129 0.0136 0.0047 0.0040 0.0026
21 vague_non_specific 0.0062 0.0073 0.0049 0.0028 0.0021
22 vocational_training 0.0014 0.0010 0.0003 0.0013 0.0007
23 worries_anxieties 0.0061 0.0042 0.0033 0.0028 0.0017