Efficiency - Test

Contents

Efficiency - Test#

Imports#

import os
import numpy as np
import pandas as pd
from iqual import  tests

Load datasets#

data_dir         = "../../data"

### Enhanced bootstrap data
bootstrap_df = pd.read_csv(os.path.join(data_dir,"enhanced_bootstrap_mean.csv"))
bootstrap_df.head()

	uid	data_round	refugee_status	bootstrap_run	annotated	religious_act	secular_act	no_ambition_act	vague_job_act	job_secular_act	...	ability_high	ability_low	budget_high	budget_low	awareness_information_high	awareness_information_low	camp_regulations	covid_impacts	public_assistance	worries_anxieties
0	C601001009R2	R2	refugee	1	1	0.00	0.166667	0.0	0.333333	0.000000	...	0.0	0.000000	0.0	0.333333	0.0	0.000000	0.00	0.000	0.0	0.0
1	C601001009R3	R3	refugee	1	1	0.00	0.117647	0.0	0.058824	0.294118	...	0.0	0.058824	0.0	0.000000	0.0	0.117647	0.00	0.000	0.0	0.0
2	C601002009R2	R2	refugee	1	1	0.00	0.000000	0.0	0.125000	0.000000	...	0.0	0.125000	0.0	0.125000	0.0	0.000000	0.25	0.000	0.0	0.0
3	C601003005R3	R3	refugee	1	1	0.00	0.000000	0.0	0.000000	0.250000	...	0.0	0.000000	0.0	0.000000	0.0	0.000000	0.00	0.125	0.0	0.0
4	C602004004R3	R3	refugee	1	1	0.05	0.000000	0.0	0.000000	0.200000	...	0.1	0.000000	0.0	0.000000	0.0	0.000000	0.05	0.000	0.0	0.0

5 rows × 80 columns

Annotations#

annotation_vars = [
    'ability_high', 'ability_low', 
    'awareness_information_high', 'awareness_information_low', 
    'budget_high', 'budget_low', 'covid_impacts', 
    'education_high', 'education_low', 'education_neutral', 'education_religious', 
    'entrepreneur', 'job_secular', 'marriage', 'migration', 
    'no_ambition', 'public_assistance', 'reliance_on_god', 
    'religious', 'secular', 'vague_job', 'vague_non_specific', 
    'vocational_training', 'worries_anxieties',
]

Separate Datasets#

### Create three datasets

# > Human DF
human_df   = bootstrap_df[bootstrap_df.annotated==1]

# Machine DF
machine_df = bootstrap_df[bootstrap_df.annotated==0]

# Out-sample data / In-sample average predictions

## >  Out-Sample
# test_df  = bootstrap_df[bootstrap_df.split=='test']] 

## >  In-sample average predictions
med_df     = bootstrap_df[bootstrap_df.annotated==1].groupby(['uid','refugee_status','data_round']).mean(numeric_only=True).reset_index()

Use `tests.Efficiency` from `iQual` to get `Measurement Error Variances`#

pd.options.display.float_format = '{:.4f}'.format

efficiency_df  = tests.Efficiency(human_df,med_df,machine_df,annotation_vars).get_results_all()
print("Measurement Error Variances")
efficiency_df[['annotation','sig2_h','sig2_y','sig2_eps','se_h','se_enh']]

Measurement Error Variances

	annotation	sig2_h	sig2_y	sig2_eps	se_h	se_enh
0	ability_high	0.0064	0.0098	0.0036	0.0029	0.0021
1	ability_low	0.0057	0.0050	0.0038	0.0027	0.0018
2	awareness_information_high	0.0091	0.0096	0.0070	0.0034	0.0024
3	awareness_information_low	0.0010	0.0008	0.0010	0.0011	0.0008
4	budget_high	0.0046	0.0055	0.0025	0.0024	0.0017
5	budget_low	0.0156	0.0116	0.0060	0.0044	0.0026
6	covid_impacts	0.0037	0.0030	0.0009	0.0022	0.0013
7	education_high	0.0093	0.0090	0.0055	0.0034	0.0023
8	education_low	0.0027	0.0023	0.0014	0.0019	0.0012
9	education_neutral	0.0245	0.0267	0.0108	0.0056	0.0037
10	education_religious	0.0047	0.0049	0.0023	0.0024	0.0016
11	entrepreneur	0.0053	0.0075	0.0015	0.0026	0.0018
12	job_secular	0.0156	0.0175	0.0055	0.0045	0.0029
13	marriage	0.0133	0.0127	0.0016	0.0041	0.0024
14	migration	0.0042	0.0026	0.0007	0.0023	0.0012
15	no_ambition	0.0015	0.0010	0.0010	0.0014	0.0009
16	public_assistance	0.0019	0.0017	0.0009	0.0015	0.0010
17	reliance_on_god	0.0041	0.0043	0.0020	0.0023	0.0015
18	religious	0.0060	0.0073	0.0020	0.0027	0.0018
19	secular	0.0090	0.0084	0.0042	0.0034	0.0022
20	vague_job	0.0129	0.0136	0.0047	0.0040	0.0026
21	vague_non_specific	0.0062	0.0073	0.0049	0.0028	0.0021
22	vocational_training	0.0014	0.0010	0.0003	0.0013	0.0007
23	worries_anxieties	0.0061	0.0042	0.0033	0.0028	0.0017