{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "(content:post_process_collection)=\n", "# Post-processing the CSV file created by pySocialWatcher\n", "\n", "So far, we have: \n", "1. [Created a development account and generated a token for our collection](getting_your_token)\n", "2. [Learned how to run a data collection](content:basic_example)\n", "3. [Learned how to customize our collection and save the results to disk](content:json_creation)\n", "\n", "We now assume that a file named ``output_psw_top5_cities.csv`` is created on disk after processing the [previous notebook](content:json_creation).\n", "\n", "Now we will learn how to use pySocialWatcher tools to post-process the data collected and create a human-readable file, which is also ready [to plot some maps](content:plotting_maps)." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2021-02-24T11:35:05.370203Z", "start_time": "2021-02-24T11:35:05.364538Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "from pysocialwatcher import post_process" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2021-02-24T11:35:07.398072Z", "start_time": "2021-02-24T11:35:07.372890Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "name | \n", "interests | \n", "ages_ranges | \n", "genders | \n", "behavior | \n", "scholarities | \n", "languages | \n", "family_statuses | \n", "relationship_statuses | \n", "... | \n", "household_composition | \n", "all_fields | \n", "targeting | \n", "response | \n", "dau_audience | \n", "mau_audience | \n", "access_device | \n", "timestamp | \n", "publisher_platforms | \n", "mock_response | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "test | \n", "NaN | \n", "{'min': 18} | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "(('ages_ranges', {'min': 18}), ('genders', 0),... | \n", "{'geo_locations': {'cities': [{'key': 2880782,... | \n", "b'{\"data\":[{\"daily_outcomes_curve\":[{\"spend\":0... | \n", "0 | \n", "1000 | \n", "{'name': '2G', 'or': [6017253486583]} | \n", "1614166082 | \n", "[\"facebook\"] | \n", "False | \n", "
1 | \n", "1 | \n", "test | \n", "NaN | \n", "{'min': 18} | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "(('ages_ranges', {'min': 18}), ('genders', 0),... | \n", "{'geo_locations': {'cities': [{'key': 2490299,... | \n", "b'{\"data\":[{\"daily_outcomes_curve\":[{\"spend\":0... | \n", "0 | \n", "1000 | \n", "{'name': '2G', 'or': [6017253486583]} | \n", "1614166082 | \n", "[\"facebook\"] | \n", "False | \n", "
2 | \n", "2 | \n", "test | \n", "NaN | \n", "{'min': 18} | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "(('ages_ranges', {'min': 18}), ('genders', 0),... | \n", "{'geo_locations': {'cities': [{'key': 2673660,... | \n", "b'{\"data\":[{\"daily_outcomes_curve\":[{\"spend\":0... | \n", "463 | \n", "1700 | \n", "{'name': '2G', 'or': [6017253486583]} | \n", "1614166082 | \n", "[\"facebook\"] | \n", "False | \n", "
3 | \n", "3 | \n", "test | \n", "NaN | \n", "{'min': 18} | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "(('ages_ranges', {'min': 18}), ('genders', 0),... | \n", "{'geo_locations': {'cities': [{'key': 1035921,... | \n", "b'{\"data\":[{\"daily_outcomes_curve\":[{\"spend\":0... | \n", "5055 | \n", "14000 | \n", "{'name': '2G', 'or': [6017253486583]} | \n", "1614166082 | \n", "[\"facebook\"] | \n", "False | \n", "
4 | \n", "4 | \n", "test | \n", "NaN | \n", "{'min': 18} | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "(('ages_ranges', {'min': 18}), ('genders', 0),... | \n", "{'geo_locations': {'cities': [{'key': 269969, ... | \n", "b'{\"data\":[{\"daily_outcomes_curve\":[{\"spend\":0... | \n", "777 | \n", "2000 | \n", "{'name': '2G', 'or': [6017253486583]} | \n", "1614166082 | \n", "[\"facebook\"] | \n", "False | \n", "
5 rows × 21 columns
\n", "\n", " | LocationType | \n", "FullLocation | \n", "Gender | \n", "Ages | \n", "Education | \n", "
---|---|---|---|---|---|
0 | \n", "city | \n", "Minato-ku, Tokyo, JP | \n", "both | \n", "18- | \n", "AllDegrees | \n", "
1 | \n", "city | \n", "New York, New York, US | \n", "both | \n", "18- | \n", "AllDegrees | \n", "
2 | \n", "city | \n", "Mexico City, Distrito Federal, MX | \n", "both | \n", "18- | \n", "AllDegrees | \n", "
3 | \n", "city | \n", "Mumbai, Maharashtra, IN | \n", "both | \n", "18- | \n", "AllDegrees | \n", "
combo | \n", "Key | \n", "both_18-40_2G | \n", "both_18-40_3G | \n", "both_18-40_4G | \n", "both_18-40_AllDevices | \n", "both_18-40_Wifi | \n", "both_18-_2G | \n", "both_18-_3G | \n", "both_18-_4G | \n", "both_18-_AllDevices | \n", "... | \n", "male_41-54_2G | \n", "male_41-54_3G | \n", "male_41-54_4G | \n", "male_41-54_AllDevices | \n", "male_41-54_Wifi | \n", "male_55-_2G | \n", "male_55-_3G | \n", "male_55-_4G | \n", "male_55-_AllDevices | \n", "male_55-_Wifi | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "269969 | \n", "1000 | \n", "45000 | \n", "510000 | \n", "5800000 | \n", "3700000 | \n", "2000 | \n", "88000 | \n", "870000 | \n", "9800000 | \n", "... | \n", "1000 | \n", "12000 | \n", "120000 | \n", "1000000 | \n", "610000 | \n", "1000 | \n", "9300 | \n", "68000 | \n", "600000 | \n", "370000 | \n", "
1 | \n", "1035921 | \n", "11000 | \n", "46000 | \n", "5300000 | \n", "9000000 | \n", "1700000 | \n", "14000 | \n", "58000 | \n", "6500000 | \n", "11000000 | \n", "... | \n", "1800 | \n", "5700 | \n", "640000 | \n", "1100000 | \n", "290000 | \n", "1000 | \n", "2700 | \n", "210000 | \n", "450000 | \n", "170000 | \n", "
2 | \n", "2490299 | \n", "1000 | \n", "4900 | \n", "520000 | \n", "3300000 | \n", "1600000 | \n", "1000 | \n", "11000 | \n", "1100000 | \n", "5900000 | \n", "... | \n", "1000 | \n", "1700 | \n", "150000 | \n", "670000 | \n", "300000 | \n", "1000 | \n", "2000 | \n", "130000 | \n", "540000 | \n", "270000 | \n", "
3 | \n", "2673660 | \n", "1200 | \n", "160000 | \n", "1000000 | \n", "7600000 | \n", "4800000 | \n", "1700 | \n", "240000 | \n", "1400000 | \n", "11000000 | \n", "... | \n", "1000 | \n", "28000 | \n", "180000 | \n", "1100000 | \n", "710000 | \n", "1000 | \n", "17000 | \n", "77000 | \n", "590000 | \n", "410000 | \n", "
4 | \n", "2880782 | \n", "1000 | \n", "1000 | \n", "8400 | \n", "64000 | \n", "34000 | \n", "1000 | \n", "1000 | \n", "15000 | \n", "120000 | \n", "... | \n", "1000 | \n", "1000 | \n", "2900 | \n", "23000 | \n", "12000 | \n", "1000 | \n", "1000 | \n", "1500 | \n", "11000 | \n", "5600 | \n", "
5 rows × 61 columns
\n", "\n", " | Key | \n", "Name | \n", "Region | \n", "FullLocation | \n", "both_18-40_2G | \n", "both_18-40_3G | \n", "both_18-40_4G | \n", "both_18-40_AllDevices | \n", "both_18-40_Wifi | \n", "both_18-_2G | \n", "... | \n", "male_41-54_2G | \n", "male_41-54_3G | \n", "male_41-54_4G | \n", "male_41-54_AllDevices | \n", "male_41-54_Wifi | \n", "male_55-_2G | \n", "male_55-_3G | \n", "male_55-_4G | \n", "male_55-_AllDevices | \n", "male_55-_Wifi | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2880782 | \n", "Minato-ku | \n", "Tokyo | \n", "Minato-ku, Tokyo, JP | \n", "1000 | \n", "1000 | \n", "8400 | \n", "64000 | \n", "34000 | \n", "1000 | \n", "... | \n", "1000 | \n", "1000 | \n", "2900 | \n", "23000 | \n", "12000 | \n", "1000 | \n", "1000 | \n", "1500 | \n", "11000 | \n", "5600 | \n", "
1 | \n", "2490299 | \n", "New York | \n", "New York | \n", "New York, New York, US | \n", "1000 | \n", "4900 | \n", "520000 | \n", "3300000 | \n", "1600000 | \n", "1000 | \n", "... | \n", "1000 | \n", "1700 | \n", "150000 | \n", "670000 | \n", "300000 | \n", "1000 | \n", "2000 | \n", "130000 | \n", "540000 | \n", "270000 | \n", "
2 | \n", "2673660 | \n", "Mexico City | \n", "Distrito Federal | \n", "Mexico City, Distrito Federal, MX | \n", "1200 | \n", "160000 | \n", "1000000 | \n", "7600000 | \n", "4800000 | \n", "1700 | \n", "... | \n", "1000 | \n", "28000 | \n", "180000 | \n", "1100000 | \n", "710000 | \n", "1000 | \n", "17000 | \n", "77000 | \n", "590000 | \n", "410000 | \n", "
3 | \n", "1035921 | \n", "Mumbai | \n", "Maharashtra | \n", "Mumbai, Maharashtra, IN | \n", "11000 | \n", "46000 | \n", "5300000 | \n", "9000000 | \n", "1700000 | \n", "14000 | \n", "... | \n", "1800 | \n", "5700 | \n", "640000 | \n", "1100000 | \n", "290000 | \n", "1000 | \n", "2700 | \n", "210000 | \n", "450000 | \n", "170000 | \n", "
4 | \n", "269969 | \n", "São Paulo | \n", "São Paulo (state) | \n", "São Paulo, São Paulo (state), BR | \n", "1000 | \n", "45000 | \n", "510000 | \n", "5800000 | \n", "3700000 | \n", "2000 | \n", "... | \n", "1000 | \n", "12000 | \n", "120000 | \n", "1000000 | \n", "610000 | \n", "1000 | \n", "9300 | \n", "68000 | \n", "600000 | \n", "370000 | \n", "
5 rows × 64 columns
\n", "