Efficiency - Test
Contents
Efficiency - Test#
Imports#
import os
import numpy as np
import pandas as pd
from iqual import tests
Load datasets#
data_dir = "../../data"
### Enhanced bootstrap data
bootstrap_df = pd.read_csv(os.path.join(data_dir,"enhanced_bootstrap_mean.csv"))
bootstrap_df.head()
uid | data_round | refugee_status | bootstrap_run | annotated | religious_act | secular_act | no_ambition_act | vague_job_act | job_secular_act | ... | ability_high | ability_low | budget_high | budget_low | awareness_information_high | awareness_information_low | camp_regulations | covid_impacts | public_assistance | worries_anxieties | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C601001009R2 | R2 | refugee | 1 | 1 | 0.00 | 0.166667 | 0.0 | 0.333333 | 0.000000 | ... | 0.0 | 0.000000 | 0.0 | 0.333333 | 0.0 | 0.000000 | 0.00 | 0.000 | 0.0 | 0.0 |
1 | C601001009R3 | R3 | refugee | 1 | 1 | 0.00 | 0.117647 | 0.0 | 0.058824 | 0.294118 | ... | 0.0 | 0.058824 | 0.0 | 0.000000 | 0.0 | 0.117647 | 0.00 | 0.000 | 0.0 | 0.0 |
2 | C601002009R2 | R2 | refugee | 1 | 1 | 0.00 | 0.000000 | 0.0 | 0.125000 | 0.000000 | ... | 0.0 | 0.125000 | 0.0 | 0.125000 | 0.0 | 0.000000 | 0.25 | 0.000 | 0.0 | 0.0 |
3 | C601003005R3 | R3 | refugee | 1 | 1 | 0.00 | 0.000000 | 0.0 | 0.000000 | 0.250000 | ... | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.00 | 0.125 | 0.0 | 0.0 |
4 | C602004004R3 | R3 | refugee | 1 | 1 | 0.05 | 0.000000 | 0.0 | 0.000000 | 0.200000 | ... | 0.1 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.05 | 0.000 | 0.0 | 0.0 |
5 rows × 80 columns
Annotations#
annotation_vars = [
'ability_high', 'ability_low',
'awareness_information_high', 'awareness_information_low',
'budget_high', 'budget_low', 'covid_impacts',
'education_high', 'education_low', 'education_neutral', 'education_religious',
'entrepreneur', 'job_secular', 'marriage', 'migration',
'no_ambition', 'public_assistance', 'reliance_on_god',
'religious', 'secular', 'vague_job', 'vague_non_specific',
'vocational_training', 'worries_anxieties',
]
Separate Datasets#
### Create three datasets
# > Human DF
human_df = bootstrap_df[bootstrap_df.annotated==1]
# Machine DF
machine_df = bootstrap_df[bootstrap_df.annotated==0]
# Out-sample data / In-sample average predictions
## > Out-Sample
# test_df = bootstrap_df[bootstrap_df.split=='test']]
## > In-sample average predictions
med_df = bootstrap_df[bootstrap_df.annotated==1].groupby(['uid','refugee_status','data_round']).mean(numeric_only=True).reset_index()
Use tests.Efficiency
from iQual
to get Measurement Error Variances
#
pd.options.display.float_format = '{:.4f}'.format
efficiency_df = tests.Efficiency(human_df,med_df,machine_df,annotation_vars).get_results_all()
print("Measurement Error Variances")
efficiency_df[['annotation','sig2_h','sig2_y','sig2_eps','se_h','se_enh']]
Measurement Error Variances
annotation | sig2_h | sig2_y | sig2_eps | se_h | se_enh | |
---|---|---|---|---|---|---|
0 | ability_high | 0.0064 | 0.0098 | 0.0036 | 0.0029 | 0.0021 |
1 | ability_low | 0.0057 | 0.0050 | 0.0038 | 0.0027 | 0.0018 |
2 | awareness_information_high | 0.0091 | 0.0096 | 0.0070 | 0.0034 | 0.0024 |
3 | awareness_information_low | 0.0010 | 0.0008 | 0.0010 | 0.0011 | 0.0008 |
4 | budget_high | 0.0046 | 0.0055 | 0.0025 | 0.0024 | 0.0017 |
5 | budget_low | 0.0156 | 0.0116 | 0.0060 | 0.0044 | 0.0026 |
6 | covid_impacts | 0.0037 | 0.0030 | 0.0009 | 0.0022 | 0.0013 |
7 | education_high | 0.0093 | 0.0090 | 0.0055 | 0.0034 | 0.0023 |
8 | education_low | 0.0027 | 0.0023 | 0.0014 | 0.0019 | 0.0012 |
9 | education_neutral | 0.0245 | 0.0267 | 0.0108 | 0.0056 | 0.0037 |
10 | education_religious | 0.0047 | 0.0049 | 0.0023 | 0.0024 | 0.0016 |
11 | entrepreneur | 0.0053 | 0.0075 | 0.0015 | 0.0026 | 0.0018 |
12 | job_secular | 0.0156 | 0.0175 | 0.0055 | 0.0045 | 0.0029 |
13 | marriage | 0.0133 | 0.0127 | 0.0016 | 0.0041 | 0.0024 |
14 | migration | 0.0042 | 0.0026 | 0.0007 | 0.0023 | 0.0012 |
15 | no_ambition | 0.0015 | 0.0010 | 0.0010 | 0.0014 | 0.0009 |
16 | public_assistance | 0.0019 | 0.0017 | 0.0009 | 0.0015 | 0.0010 |
17 | reliance_on_god | 0.0041 | 0.0043 | 0.0020 | 0.0023 | 0.0015 |
18 | religious | 0.0060 | 0.0073 | 0.0020 | 0.0027 | 0.0018 |
19 | secular | 0.0090 | 0.0084 | 0.0042 | 0.0034 | 0.0022 |
20 | vague_job | 0.0129 | 0.0136 | 0.0047 | 0.0040 | 0.0026 |
21 | vague_non_specific | 0.0062 | 0.0073 | 0.0049 | 0.0028 | 0.0021 |
22 | vocational_training | 0.0014 | 0.0010 | 0.0003 | 0.0013 | 0.0007 |
23 | worries_anxieties | 0.0061 | 0.0042 | 0.0033 | 0.0028 | 0.0017 |