{ "cells": [ { "cell_type": "markdown", "id": "2be1b7ab", "metadata": {}, "source": [ "# Efficiency - Test" ] }, { "attachments": {}, "cell_type": "markdown", "id": "2f03db12", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "135430fa", "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "from iqual import tests" ] }, { "attachments": {}, "cell_type": "markdown", "id": "17eefba0", "metadata": {}, "source": [ "### Load datasets" ] }, { "cell_type": "code", "execution_count": 2, "id": "285f4255", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uiddata_roundrefugee_statusbootstrap_runannotatedreligious_actsecular_actno_ambition_actvague_job_actjob_secular_act...ability_highability_lowbudget_highbudget_lowawareness_information_highawareness_information_lowcamp_regulationscovid_impactspublic_assistanceworries_anxieties
0C601001009R2R2refugee110.000.1666670.00.3333330.000000...0.00.0000000.00.3333330.00.0000000.000.0000.00.0
1C601001009R3R3refugee110.000.1176470.00.0588240.294118...0.00.0588240.00.0000000.00.1176470.000.0000.00.0
2C601002009R2R2refugee110.000.0000000.00.1250000.000000...0.00.1250000.00.1250000.00.0000000.250.0000.00.0
3C601003005R3R3refugee110.000.0000000.00.0000000.250000...0.00.0000000.00.0000000.00.0000000.000.1250.00.0
4C602004004R3R3refugee110.050.0000000.00.0000000.200000...0.10.0000000.00.0000000.00.0000000.050.0000.00.0
\n", "

5 rows × 80 columns

\n", "
" ], "text/plain": [ " uid data_round refugee_status bootstrap_run annotated \\\n", "0 C601001009R2 R2 refugee 1 1 \n", "1 C601001009R3 R3 refugee 1 1 \n", "2 C601002009R2 R2 refugee 1 1 \n", "3 C601003005R3 R3 refugee 1 1 \n", "4 C602004004R3 R3 refugee 1 1 \n", "\n", " religious_act secular_act no_ambition_act vague_job_act \\\n", "0 0.00 0.166667 0.0 0.333333 \n", "1 0.00 0.117647 0.0 0.058824 \n", "2 0.00 0.000000 0.0 0.125000 \n", "3 0.00 0.000000 0.0 0.000000 \n", "4 0.05 0.000000 0.0 0.000000 \n", "\n", " job_secular_act ... ability_high ability_low budget_high budget_low \\\n", "0 0.000000 ... 0.0 0.000000 0.0 0.333333 \n", "1 0.294118 ... 0.0 0.058824 0.0 0.000000 \n", "2 0.000000 ... 0.0 0.125000 0.0 0.125000 \n", "3 0.250000 ... 0.0 0.000000 0.0 0.000000 \n", "4 0.200000 ... 0.1 0.000000 0.0 0.000000 \n", "\n", " awareness_information_high awareness_information_low camp_regulations \\\n", "0 0.0 0.000000 0.00 \n", "1 0.0 0.117647 0.00 \n", "2 0.0 0.000000 0.25 \n", "3 0.0 0.000000 0.00 \n", "4 0.0 0.000000 0.05 \n", "\n", " covid_impacts public_assistance worries_anxieties \n", "0 0.000 0.0 0.0 \n", "1 0.000 0.0 0.0 \n", "2 0.000 0.0 0.0 \n", "3 0.125 0.0 0.0 \n", "4 0.000 0.0 0.0 \n", "\n", "[5 rows x 80 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dir = \"../../data\"\n", "\n", "### Enhanced bootstrap data\n", "bootstrap_df = pd.read_csv(os.path.join(data_dir,\"enhanced_bootstrap_mean.csv\"))\n", "bootstrap_df.head()" ] }, { "attachments": {}, "cell_type": "markdown", "id": "cb9fc0f1", "metadata": {}, "source": [ "### Annotations" ] }, { "cell_type": "code", "execution_count": 3, "id": "3ddf1950", "metadata": {}, "outputs": [], "source": [ "annotation_vars = [\n", " 'ability_high', 'ability_low', \n", " 'awareness_information_high', 'awareness_information_low', \n", " 'budget_high', 'budget_low', 'covid_impacts', \n", " 'education_high', 'education_low', 'education_neutral', 'education_religious', \n", " 'entrepreneur', 'job_secular', 'marriage', 'migration', \n", " 'no_ambition', 'public_assistance', 'reliance_on_god', \n", " 'religious', 'secular', 'vague_job', 'vague_non_specific', \n", " 'vocational_training', 'worries_anxieties',\n", "]" ] }, { "attachments": {}, "cell_type": "markdown", "id": "7eac3f4d", "metadata": {}, "source": [ "### Separate Datasets" ] }, { "cell_type": "code", "execution_count": 4, "id": "67c55028", "metadata": {}, "outputs": [], "source": [ "### Create three datasets\n", "\n", "# > Human DF\n", "human_df = bootstrap_df[bootstrap_df.annotated==1]\n", "\n", "# Machine DF\n", "machine_df = bootstrap_df[bootstrap_df.annotated==0]\n", "\n", "# Out-sample data / In-sample average predictions\n", "\n", "## > Out-Sample\n", "# test_df = bootstrap_df[bootstrap_df.split=='test']] \n", "\n", "## > In-sample average predictions\n", "med_df = bootstrap_df[bootstrap_df.annotated==1].groupby(['uid','refugee_status','data_round']).mean(numeric_only=True).reset_index()" ] }, { "attachments": {}, "cell_type": "markdown", "id": "607a7d39", "metadata": {}, "source": [ "### Use `tests.Efficiency` from `iQual` to get `Measurement Error Variances`" ] }, { "cell_type": "code", "execution_count": 5, "id": "c44f7bb0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Measurement Error Variances\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
annotationsig2_hsig2_ysig2_epsse_hse_enh
0ability_high0.00640.00980.00360.00290.0021
1ability_low0.00570.00500.00380.00270.0018
2awareness_information_high0.00910.00960.00700.00340.0024
3awareness_information_low0.00100.00080.00100.00110.0008
4budget_high0.00460.00550.00250.00240.0017
5budget_low0.01560.01160.00600.00440.0026
6covid_impacts0.00370.00300.00090.00220.0013
7education_high0.00930.00900.00550.00340.0023
8education_low0.00270.00230.00140.00190.0012
9education_neutral0.02450.02670.01080.00560.0037
10education_religious0.00470.00490.00230.00240.0016
11entrepreneur0.00530.00750.00150.00260.0018
12job_secular0.01560.01750.00550.00450.0029
13marriage0.01330.01270.00160.00410.0024
14migration0.00420.00260.00070.00230.0012
15no_ambition0.00150.00100.00100.00140.0009
16public_assistance0.00190.00170.00090.00150.0010
17reliance_on_god0.00410.00430.00200.00230.0015
18religious0.00600.00730.00200.00270.0018
19secular0.00900.00840.00420.00340.0022
20vague_job0.01290.01360.00470.00400.0026
21vague_non_specific0.00620.00730.00490.00280.0021
22vocational_training0.00140.00100.00030.00130.0007
23worries_anxieties0.00610.00420.00330.00280.0017
\n", "
" ], "text/plain": [ " annotation sig2_h sig2_y sig2_eps se_h se_enh\n", "0 ability_high 0.0064 0.0098 0.0036 0.0029 0.0021\n", "1 ability_low 0.0057 0.0050 0.0038 0.0027 0.0018\n", "2 awareness_information_high 0.0091 0.0096 0.0070 0.0034 0.0024\n", "3 awareness_information_low 0.0010 0.0008 0.0010 0.0011 0.0008\n", "4 budget_high 0.0046 0.0055 0.0025 0.0024 0.0017\n", "5 budget_low 0.0156 0.0116 0.0060 0.0044 0.0026\n", "6 covid_impacts 0.0037 0.0030 0.0009 0.0022 0.0013\n", "7 education_high 0.0093 0.0090 0.0055 0.0034 0.0023\n", "8 education_low 0.0027 0.0023 0.0014 0.0019 0.0012\n", "9 education_neutral 0.0245 0.0267 0.0108 0.0056 0.0037\n", "10 education_religious 0.0047 0.0049 0.0023 0.0024 0.0016\n", "11 entrepreneur 0.0053 0.0075 0.0015 0.0026 0.0018\n", "12 job_secular 0.0156 0.0175 0.0055 0.0045 0.0029\n", "13 marriage 0.0133 0.0127 0.0016 0.0041 0.0024\n", "14 migration 0.0042 0.0026 0.0007 0.0023 0.0012\n", "15 no_ambition 0.0015 0.0010 0.0010 0.0014 0.0009\n", "16 public_assistance 0.0019 0.0017 0.0009 0.0015 0.0010\n", "17 reliance_on_god 0.0041 0.0043 0.0020 0.0023 0.0015\n", "18 religious 0.0060 0.0073 0.0020 0.0027 0.0018\n", "19 secular 0.0090 0.0084 0.0042 0.0034 0.0022\n", "20 vague_job 0.0129 0.0136 0.0047 0.0040 0.0026\n", "21 vague_non_specific 0.0062 0.0073 0.0049 0.0028 0.0021\n", "22 vocational_training 0.0014 0.0010 0.0003 0.0013 0.0007\n", "23 worries_anxieties 0.0061 0.0042 0.0033 0.0028 0.0017" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.float_format = '{:.4f}'.format\n", "\n", "efficiency_df = tests.Efficiency(human_df,med_df,machine_df,annotation_vars).get_results_all()\n", "print(\"Measurement Error Variances\")\n", "efficiency_df[['annotation','sig2_h','sig2_y','sig2_eps','se_h','se_enh']]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }