{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "BaseDataset_4_EDA.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "V7ZkGLD6tDB5", "outputId": "a35a60e3-a34c-43bb-c1b3-e1c3a7ed9992" }, "source": [ "# you 1st need to right click on the shared folder and select 'Add to my Drive'\n", "from google.colab import drive\n", "drive.mount('/content/drive/')" ], "execution_count": 27, "outputs": [ { "output_type": "stream", "text": [ "Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount(\"/content/drive/\", force_remount=True).\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "GHtg26JQtlz4" }, "source": [ "#paths to Datasets\n", "ng_state_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nga_adm_osgof_20190417/nga_admbnda_adm1_osgof_20190417.shp' \n", "zonal_stats_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nga_lga_zonal_statistics_2016.csv'\n", "pop_den_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/Nigeria_Population_Density_by_State_as_at_2016.csv'\n", "dhs_survey_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/dhs_survey/NGPR7ADT/NGPR7AFL.DTA'\n", "dhs_survey_dict_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/dhs_survey/NGPR7ADT/NGPR7AFL.DO'\n", "lga_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/nigeria-lgas/new_lga_nigeria_2003.shp'\n", "dhs_gps_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/NG_2018_DHS_07172021_114_165261_gps/NGGE7BFL/NGGE7BFL.shp'\n", "fb_sett_img_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/hrsl_nga_v1_fb_settlement/hrsl_nga_settlement.tif'\n", "fb_pop_img_path='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/hrsl_nga_v1_fb_settlement/hrsl_nga_pop.tif'\n", "per_house_no_elect='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/DHSstatcompiler_Ng_%_households_with_no_electricity_2018.xlsx'\n", "per_pop__no_elect='/content/drive/MyDrive/omdena/omdena-nigeria/dataset/DHS_STATcompilerExport_%_Pop_without_electricity_2018.xlsx'" ], "execution_count": 28, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "c2q080MZxnBL" }, "source": [ "\n", "\n", "!pip install -q geopandas\n", "import os\n", "import glob\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from functools import reduce\n" ], "execution_count": 29, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "y7arX9cVx927" }, "source": [ "\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ], "execution_count": 30, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "pfPGjWGuyAM2" }, "source": [ "#Helpers\n", "def get_dhs_dict(dhs_dict_file):\n", " dhs_dict = dict()\n", " with open(dhs_dict_file, 'r', errors='replace') as file:\n", " line = file.readline()\n", " while line:\n", " line = file.readline()\n", " if 'label variable' in line:\n", " code = line.split()[2]\n", " colname = ' '.join([x.strip('\"') for x in line.split()[3:]])\n", " dhs_dict[code] = colname\n", " return dhs_dict\n", "\n", "def cleaning_txt(x):\n", " return x.str.replace(\"Region : ..\".strip(), \"\").apply(lambda a:str(a).lower())\n", "\n", "#function to change codes values to actual string values as described in the metadata\n", "def dhs_preprocessing(df,df_survey_dict_path,prep_list):\n", " df_metadata=get_dhs_dict(dhs_survey_dict_path)\n", " df.columns = df.columns.map(df_metadata)\n", " #i=range(0,len(x))\n", " df=df[[prep_list[0]]]\n", " for i in range(1,len(x)):\n", " df.replace(prep_list[i],inplace=True)\n", " return df\n", "\n", " #I manually extract the important columns needed for the analysis\n", " #and converted to list\n", "important_cols=list(\n", "{'hhid': 'Case Identification',\n", "'hv000': 'Country code and phase',\n", "'hv002': 'Household number',\n", "'hv004': 'Ultimate area unit',\n", " 'hv023': 'Stratification used in sample design',\n", " 'hv024': 'Region',\n", " 'hv025': 'Type of place of residence',\n", " 'hv040': 'Cluster altitude in meters',\n", "'hv206': 'Has electricity',\n", " 'hv208': 'Has television',\n", " 'hv209': 'Has refrigerator',\n", "'sh121m': 'Has electric iron',\n", "'hv235': 'Location of source for water',\n", "'hv243a': 'Has mobile telephone',\n", " 'hv243e': 'Has a computer',\n", " 'shstate': 'State',\n", " 'sh121l': 'Has air conditioner',\n", "'hv009': 'Number of household members'}.values())\n", "\n", "\n", "#convert state codes to state strings\n", "state_dict={10:\"Sokoto\",\n", " 20:\"Zamfara\",\n", " 30:\"Katsina\",\n", " 40:\"Jigawa\",\n", " 50:\"Yobe\",\n", " 60:\"Borno\",\n", " 70:\"Adamawa\",\n", " 80:\"Gombe\",\n", " 90:\"Bauchi\",\n", " 100:\"Kano\",\n", " 110:\"Kaduna\",\n", " 120:\"Kebbi\",\n", " 130:\"Niger\",\n", " 140:\"FCT Abuja\",\n", " 150:\"Nasarawa\",\n", " 160:\"Plateau\",\n", " 170:\"Taraba\",\n", " 180:\"Benue\",\n", " 190:\"Kogi\",\n", " 200:\"Kwara\",\n", " 210:\"Oyo\",\n", " 220:\"Osun\",\n", " 230:\"Ekiti\",\n", " 240:\"Ondo\",\n", " 250:\"Edo\",\n", " 260:\"Anambra\",\n", " 270:\"Enugu\",\n", " 280:\"Ebonyi\",\n", " 290:\"Cross River\",\n", " 300:\"Akwa Ibom\",\n", " 310:\"Abia\",\n", " 320:\"Imo\",\n", " 330:\"Rivers\",\n", " 340:\"Bayelsa\",\n", " 350:\"Delta\",\n", " 360:\"Lagos\",\n", " 370:\"Ogun\"}\n", "\n", "#convert strata codes to strata strings\n", "stratum_dict={1:\"NC Benue Urban\",\n", " 2:\"NC Benue Rural\",\n", " 3:\"NC FCT Abuja Urban\",\n", " 4:\"NC FCT Abuja Rural\",\n", " 5:\"NC Kogi Urban\",\n", " 6:\"NC Kogi Rural\",\n", " 7:\"NC Kwara Urban\",\n", " 8:\"NC Kwara Rural\",\n", " 9:\"NC Nasarawa Urban\",\n", " 10:\"NC Nasarawa Rural\",\n", " 11:\"NC Niger Urban\",\n", " 12:\"NC Niger Rural\",\n", " 13:\"NC Plateau Urban\",\n", " 14:\"NC Plateau Rural\",\n", " 15:\"NE Adamawa Urban\",\n", " 16:\"NE Adamawa Rural\",\n", " 17:\"NE Bauchi Urban\",\n", " 18:\"NE Bauchi Rural\",\n", " 19:\"NE Borno Urban\",\n", " 20:\"NE Borno Rural\",\n", " 21:\"NE Gombe Urban\",\n", " 22:\"NE Gombe Rural\",\n", " 23:\"NE Taraba Urban\",\n", " 24:\"NE Taraba Rural\",\n", " 25:\"NE Yobe Urban\",\n", " 26:\"NE Yobe Rural\",\n", " 27:\"NW Jigawa Urban\",\n", " 28:\"NW Jigawa Rural\",\n", " 29:\"NW Kaduna Urban\",\n", " 30:\"NW Kaduna Rural\",\n", " 31:\"NW Kano Urban\",\n", " 32:\"NW Kano Rural\",\n", " 33:\"NW Katsina Urban\",\n", " 34:\"NW Katsina Rural\",\n", " 35:\"NW Kebbi Urban\",\n", " 36:\"NW Kebbi Rural\",\n", " 37:\"NW Sokoto Urban\",\n", " 38:\"NW Sokoto Rural\",\n", " 39:\"NW Zamfara Urban\",\n", " 40:\"NW Zamfara Rural\",\n", " 41:\"SE Abia Urban\",\n", " 42:\"SE Abia Rural\",\n", " 43:\"SE Anambra Urban\",\n", " 44:\"SE Anambra Rural\",\n", " 45:\"SE Ebonyi Urban\",\n", " 46:\"SE Ebonyi Rural\",\n", " 47:\"SE Enugu Urban\",\n", " 48:\"SE Enugu Rural\",\n", " 49:\"SE Imo Urban\",\n", " 50:\"SE Imo Rural\",\n", " 51:\"SS Akwa Ibom Urban\",\n", " 52:\"SS Akwa Ibom Rural\",\n", " 53:\"SS Bayelsa Urban\",\n", " 54:\"SS Bayelsa Rural\",\n", " 55:\"SS Cross River Urban\",\n", " 56:\"SS Cross River Rural\",\n", " 57:\"SS Delta Urban\",\n", " 58:\"SS Delta Rural\",\n", " 59:\"SS Edo Urban\",\n", " 60:\"SS Edo Rural\",\n", " 61:\"SS Rivers Urban\",\n", " 62:\"SS Rivers Rural\",\n", " 63:\"SW Ekiti Urban\",\n", " 64:\"SW Ekiti Rural\",\n", " 65:\"SW Lagos Urban\",\n", " 66:\"SW Lagos Rural\",\n", " 67:\"SW Ogun Urban\",\n", " 68:\"SW Ogun Rural\",\n", " 69:\"SW Ondo Urban\",\n", " 70:\"SW Ondo Rural\",\n", " 71:\"SW Osun Urban\",\n", " 72:\"SW Osun Rural\",\n", " 73:\"SW Oyo Urban\",\n", " 74:\"SW Oyo Rural\"}\n", "\n", "#convert region codes to region strings\n", "region_dict={1:\"North Central\",\n", " 2:\"North East\",\n", " 3:\"North West\",\n", " 4:\"South East\",\n", " 5:\"South South\",\n", " 6:\"South West\"}\n", "#place codes to place strings\n", "place_type_dict={1:\"Urban\",2:\"Rural\"}\n", "\n", "#prep_list=[important_cols,{\"State\": state_dict},{'Stratification used in sample design':stratum_dict},{'Region':region_dict},{'Type of place of residence':place_type_dict}]" ], "execution_count": 31, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Nogf7_dHyAVc", "outputId": "e6e5c22f-5e13-4ddc-a525-f005bbfa66e6" }, "source": [ "#Loading DHS stata files\n", "dhs_survey_data =pd.read_stata(dhs_survey_path,convert_categoricals=False)\n", "print('Data Dimensions: {}'.format(dhs_survey_data.shape))" ], "execution_count": 32, "outputs": [ { "output_type": "stream", "text": [ "Data Dimensions: (188010, 357)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "XGLwR5jlBSge" }, "source": [ "#preprocessing Dhs_data \n", "#look into this later\n", "#dhs_analysis_data=dhs_preprocessing(dhs_survey_data,dhs_survey_dict_path,prep_list)\n" ], "execution_count": 33, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Iouza0NFI5XU" }, "source": [ "#changing the names of columns\n", "dhs_survey_dict=get_dhs_dict(dhs_survey_dict_path)\n", "dhs_survey_data.columns = dhs_survey_data.columns.map(dhs_survey_dict)" ], "execution_count": 34, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 403 }, "id": "wamnM104I84U", "outputId": "e4ee0bef-9923-4512-aaf5-54cd974c637f" }, "source": [ "dhs_survey_data.head(5)" ], "execution_count": 35, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Case IdentificationLine numberCountry code and phaseCluster numberHousehold numberRespondent's line number (answering Household questionnaire)Ultimate area unitHousehold sample weight (6 decimals)Month of interviewYear of interviewDate of interview (CMC)Date of interview Century Day Code (CDC)Number of household membersNumber of eligible women in householdNumber of eligible men in householdNumber of de jure membersNumber of de facto membersNumber of children 5 and under (de jure)Result of household interviewDay of interviewNumber of visitsInterviewer identificationNA - Keyer identificationEver-married samplePrimary sampling unitSample strata for sampling errorsStratification used in sample designRegionType of place of residenceNA - Place of residenceHousehold selected for male interviewHousehold weight for male subsample (6 decimals)Field supervisorNA - Field editorNA - Office editorNumber of eligible children for height and weightCluster altitude in metersTotal adults measuredHousehold selected for hemoglobinHousehold selected for Domestic Violence module...NA - Line number of person who slept in this netObtained net from campaign, antenatal or immunization visitPlace where net was obtainedShown NetReason not sleeping inside the netIndex to Household ScheduleType of Mosquito Bed Net(s) person slept under last nightNet Designation Number (HMLIDX) for 1st net person slept under last nightNet Designation Number (HMLIDX) for 2nd net person slept under last nightNet Designation Number (HMLIDX) for 3rd net person slept under last nightCorrected age from Individual fileAge in months (for children)Flag for age from Individual filePregnancy status from Individual filePerson slept under an ever-treated netPerson slept under an LLIN netLine number of parent/caretaker (for malaria testing)Read consent statement for malariaFinal result of malaria from blood smear testNA - Presence of species: falciparum (Pf)NA - Presence of species: malariae (Pm)NA - Presence of species: ovale (Po)NA - Presence of species: vivax (Pv)NA - Presence of species: CSNA - Presence of species: CSNA - Presence of species: CSResult of malaria measurementBar code for blood smear sampleResult of malaria rapid testFieldworker measurer code for malariaIndex to household scheduleWear glasses or contact lensesHave difficulty seeingWear a hearing aidHave difficulty hearingHave difficulty communicating using usual languageHave difficulty remembering or concentratingHave difficulty walking or climbing stepsHave difficulty washing all over or dressingHighest degree of difficulty for any of the impairments
01 11NG71111136835492018142543345510551121702NaN011111NaN00701NaNNaN0100000...NaN2.0NaN1.0NaN112NaNNaN82NaN0NaN00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN11.02.00.01.01.02.02.02.02.0
11 12NG71111136835492018142543345510551121702NaN011111NaN00701NaNNaN0100000...NaN2.0NaN1.0NaN211NaNNaN40NaN10.000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN20.01.00.01.01.01.01.01.01.0
21 13NG71111136835492018142543345510551121702NaN011111NaN00701NaNNaN0100000...NaNNaNNaNNaNNaN300NaNNaN18NaN0NaN00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN30.01.00.01.01.01.01.01.01.0
31 14NG71111136835492018142543345510551121702NaN011111NaN00701NaNNaN0100000...NaNNaNNaNNaNNaN400NaNNaN11NaN0NaN00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN40.01.00.01.01.01.01.01.01.0
41 15NG71111136835492018142543345510551121702NaN011111NaN00701NaNNaN0100000...NaN2.0NaN1.0NaN511NaNNaN1NaN0NaN00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN5NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 357 columns

\n", "
" ], "text/plain": [ " Case Identification ... Highest degree of difficulty for any of the impairments\n", "0 1 1 ... 2.0 \n", "1 1 1 ... 1.0 \n", "2 1 1 ... 1.0 \n", "3 1 1 ... 1.0 \n", "4 1 1 ... NaN \n", "\n", "[5 rows x 357 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 35 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rkLOoJNyJNIm", "outputId": "bc992180-669b-4c50-c5b5-e938432a3ad0" }, "source": [ "#creating a subset from the main dataset\n", "dhs_analysis_data=dhs_survey_data[important_cols]\n", "# Remap the values of the dataframe\n", "dhs_analysis_data.replace({\"State\": state_dict},inplace=True)\n", "dhs_analysis_data.replace({'Stratification used in sample design': stratum_dict},inplace=True)\n", "dhs_analysis_data.replace({'Region':region_dict},inplace=True)\n", "dhs_analysis_data.replace({'Type of place of residence':place_type_dict},inplace=True)\n", "#changing some values to regular values\n", "dhs_analysis_data.loc[dhs_analysis_data.State == 'FCT Abuja', 'State'] = 'Abuja'\n", "dhs_analysis_data.loc[dhs_analysis_data.State == 'Nasarawa', 'State'] = 'Nassarawa'\n" ], "execution_count": 36, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py:1743: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " isetter(ilocs[0], value)\n", "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py:1763: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " isetter(loc, value)\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "id": "qRRrcQqINtme" }, "source": [ "# Define the aggregation procedure outside of the groupby operation\n", "\n", "'''\n", "households_total_per_state is same as htps\n", "percent_sample_households_with_no_electricity is same as pshwne per state\n", "percent_sample_households_with_television is same as pshwt\n", "percent_sample_households_with_refegerator is same as pshwr\n", "percent_sample_households_with_iron is same as pshwi\n", "percent_sample_households_with_phone is same as pshwp\n", "percent_sample_households_with_computer is same as pshwc\n", "percent_sample_households_with_air_conditioner is same as pshwa\n", "Sample_pop: \n", "\n", "'''\n", "\n", "\n", "dhs_state_data=dhs_analysis_data.groupby('State').agg(\n", " \n", " htps=('Has electricity','sum'),\n", " pshwe=('Has electricity' , 'mean'),\n", " pshwt= ('Has television', 'mean'),\n", " pshwr= ('Has refrigerator', 'mean'),\n", " pshwi= ('Has electric iron', 'mean'),\n", " pshwp= ('Has mobile telephone', 'mean'),\n", " pshwc= ('Has a computer', 'mean'),\n", " pshwac= ('Has air conditioner', 'mean'),\n", " Sample_pop=('Number of household members', 'sum')\n", ")\n" ], "execution_count": 37, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "FTmWhI40bxFm" }, "source": [ "# overwrite index (state names) to match nigeria-states\n", "dhs_state_data.reset_index(inplace=True)\n", "dhs_state_data.State =dhs_state_data.State.astype(str)\n", "dhs_state_data.set_index('State', drop=True, inplace=True)\n", "dhs_state_data.index =dhs_state_data.index.str.lower()\n" ], "execution_count": 38, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8XdO-WIlyARf" }, "source": [ "#loading of excel files\n", "statcompiler_house_no_elect = pd.read_excel(per_house_no_elect,\n", " sheet_name=\"Indicator Data\",\n", " skiprows=[i for i in range(1,7)])\n", "\n", "statcompiler_pop_no_elect = pd.read_excel(per_pop__no_elect,\n", " sheet_name=\"Indicator Data\",\n", " skiprows=[i for i in range(1,8)])" ], "execution_count": 39, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "AyPzkUIG2GzD" }, "source": [ "\"\"\"\n", "Characteristics is same as STATE as describe in the dataset dictioanry\n", "sites:\n", "This statcompiler_pop_no_elect conotes percentage of population without electricity\n", "\"\"\"\n", "statcompiler_pop_no_elect['Characteristic']=cleaning_txt(statcompiler_pop_no_elect['Characteristic'])\n", "statcompiler_pop_no_elect=statcompiler_pop_no_elect[['Characteristic','Population with no electricity']]\n", "statcompiler_pop_no_elect.rename(columns={'Characteristic':'STATE','Population with no electricity':'%pop_with_no_elect'}, inplace = True)\n", "#statcompiler_pop_no_elect.loc[statcompiler_pop_no_elect.STATE == 'fct abuja', 'STATE'] = 'abuja'\n", "#statcompiler_pop_no_elect.loc[statcompiler_pop_no_elect.STATE == 'Nasarawa', 'STATE'] = 'nassarawa'\n", "#statcompiler_pop_no_elect.STATE.value_counts()\n", "statcompiler_pop_no_elect.reset_index(inplace=True)\n", "statcompiler_pop_no_elect.STATE =statcompiler_pop_no_elect.STATE.astype(str)\n", "statcompiler_pop_no_elect.set_index('STATE', drop=True, inplace=True)\n", "statcompiler_pop_no_elect.index =statcompiler_pop_no_elect.index.str.lower()\n", "statcompiler_pop_no_elect.rename(index={'fct abuja':'abuja','nasarawa':'nassarawa'},inplace=True)\n", "\n", "\n", "\n", "\n" ], "execution_count": 40, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "c9uGkpMm-sXs" }, "source": [ "statcompiler_house_no_elect['Characteristic']=cleaning_txt(statcompiler_house_no_elect['Characteristic'])" ], "execution_count": 41, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "-ecH-syp-poG" }, "source": [ "\"\"\"\n", "Characteristics is same as \"STATE\" as describe in the dataset dictioanry from Dhs statcompiller sites\n", "sites:\n", "This statcompiler_house_no_elect conotes percentage of households without electricity\n", "\"\"\"\n", "statcompiler_house_no_elect=statcompiler_house_no_elect[['Characteristic','Households with no electricity']]\n", "statcompiler_house_no_elect.rename(columns={'Characteristic':'STATE','Households with no electricity':'%_with_no_elect'}, inplace = True)\n", "#statcompiler_house_no_elect.loc[statcompiler_house_no_elect.STATE =='fct abuja', 'STATE'] = 'abuja'\n", "#statcompiler_house_no_elect.loc[statcompiler_house_no_elect.STATE == 'nasarawa', 'STATE'] = 'nassarawa'\n", "#statcompiler_house_no_elect.STATE.value_counts()\n", "statcompiler_house_no_elect.reset_index(inplace=True)\n", "statcompiler_house_no_elect.STATE =statcompiler_house_no_elect.STATE.astype(str)\n", "statcompiler_house_no_elect.set_index('STATE', drop=True, inplace=True)\n", "statcompiler_house_no_elect.index =statcompiler_house_no_elect.index.str.lower()\n", "statcompiler_house_no_elect.rename(index={'fct abuja':'abuja','nasarawa':'nassarawa'},inplace=True)\n", "\n", "\n" ], "execution_count": 42, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Iq8kwT0o-I1g" }, "source": [ "" ] }, { "cell_type": "code", "metadata": { "id": "Gnf-JSrahZd0" }, "source": [ "# zonal statistics dataset from night time dataset\n", "\n", "zonal_stats_data=pd.read_csv(zonal_stats_path,usecols=['mean','STATE'])\n", "#zonal_stats_data.STATE.value_counts()\n", "#zonal_stats_data.info()\n", "#zonal_stats_data.columns=['Mean_Avg_rad','State']\n", "#this sums up all avg_rad for all LGA per states\n", "zonal_stats_data=zonal_stats_data.groupby('STATE').sum()\n", "zonal_stats_data.reset_index(inplace=True)\n", "zonal_stats_data.STATE =zonal_stats_data.STATE.astype(str)\n", "zonal_stats_data.set_index('STATE', drop=True, inplace=True)\n", "zonal_stats_data.index =zonal_stats_data.index.str.lower()\n" ], "execution_count": 43, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Y8QRNT6hlAJ-" }, "source": [ "pop_den_data=pd.read_csv(pop_den_path,usecols=['NAME_1','nga_pop__1'])\n", "pop_den_data.NAME_1 =pop_den_data.NAME_1.astype(str)\n", "pop_den_data.columns=['STATE','POPULATION']\n", "pop_den_data.reset_index(inplace=True)\n", "pop_den_data.set_index('STATE', drop=True, inplace=True)\n", "pop_den_data.index =pop_den_data.index.str.lower()\n", "pop_den_data.rename(index={'federal capital territory':'abuja',},inplace=True)\n", "\n" ], "execution_count": 44, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "JRS_8GgO0Orf" }, "source": [ "#loading the nigeria lga dataset\n", "ng = gpd.read_file(ng_state_path)\n", "#dhs_gps_data = gpd.read_file(dhs_gps_path)\n", "ng_data=ng[['ADM1_REF','Shape_Leng','Shape_Area','geometry']].rename(columns={'ADM1_REF': 'STATE', 'Shape_Leng': 'Shape_Leng','Shape_Area':'Shape_Area','geometry':'Geometry'},inplace=False)\n", "ng_data.reset_index(inplace=True)\n", "ng_data.set_index('STATE', drop=True, inplace=True)\n", "ng_data.index =ng_data.index.str.lower()\n", "ng_data.rename(index={'federal capital territory':'abuja','nasarawa':'nassarawa'},inplace=True)\n", "\n" ], "execution_count": 45, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5EbuozBg7FKe" }, "source": [ "#solution 1\n", "dfs1=[ng_data,pop_den_data,zonal_stats_data,statcompiler_house_no_elect,statcompiler_pop_no_elect,dhs_state_data]\n", "combined1=dfs1[0].join(dfs1[1:])\n", "nan_value = 0\n", "dfs2=[ng_data,pop_den_data,zonal_stats_data,statcompiler_house_no_elect,statcompiler_pop_no_elect,dhs_state_data]\n", "combined2 = reduce(lambda df_left,df_right: pd.merge(df_left, df_right, \n", " left_index=True, right_index=True, \n", " how='outer'), dfs2)\n" ], "execution_count": 46, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "RCkepmEuD_yb", "outputId": "aabac7e6-8be9-446c-cca0-d6b4fee99b4f" }, "source": [ "combined1" ], "execution_count": 47, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index_xShape_LengShape_AreaGeometryindex_yPOPULATIONmeanindex_x%_with_no_electindex_y%pop_with_no_electhtpspshwepshwtpshwrpshwipshwppshwcpshwacSample_pop
STATE
abia04.6951350.396543POLYGON ((7.38681 6.03667, 7.38729 6.03605, 7....036447149.8935362010.5208.93677.00.8911780.7712070.4115370.5719830.9566170.0787690.04653422232.0
abuja143.4984120.607222POLYGON ((7.67239 9.41128, 7.71959 9.34635, 7....1429966704.913411019.2022.73440.00.7271190.7038680.4066790.5060240.9289790.1642360.10019033251.0
adamawa111.5254433.113007POLYGON ((13.62129 10.94823, 13.62592 10.94822...141456840.740757755.2757.31737.00.3653000.3268140.1007360.1312300.7846480.0527870.02776033423.0
akwa ibom25.2638300.549476MULTIPOLYGON (((8.34482 4.61140, 8.34496 4.609...2535360941.6082433126.33125.32893.00.7176880.6005950.2763580.3984120.8886130.0672290.01290020921.0
anambra33.5959600.392661POLYGON ((6.93254 6.71090, 6.93167 6.69870, 6....3542533412.4640562118.52116.14127.00.8369500.7503550.4128980.4670450.9823570.0616510.01237126223.0
bauchi413.9520054.011018POLYGON ((10.75125 12.46148, 10.75615 12.39191...463863880.052741865.6863.62514.00.3507740.2053860.0740900.1176220.7834520.0396260.01562761599.0
bayelsa55.0467080.776768POLYGON ((6.55283 5.37988, 6.56198 5.37925, 6....5222896521.4218583252.93251.21676.00.4421000.5539440.2796100.4215250.9279870.0477450.03139021605.0
benue69.4080802.578363POLYGON ((8.52442 8.15727, 8.54240 8.14873, 8....655689460.775961156.1158.71943.00.3936390.4365880.1298620.1999590.8895870.0488250.00607827990.0
borno713.7143645.987849POLYGON ((13.35885 13.71261, 13.36207 13.71153...756690540.980581957.4952.92089.00.3542480.2996440.1536370.2207900.8521280.0608780.03255943997.0
cross river88.7797961.711218MULTIPOLYGON (((8.56068 4.79847, 8.55944 4.798...837804192.5918863346.53346.61620.00.5128210.6046220.2560940.3282680.8138650.0623620.03323814401.0
delta97.3725261.394082POLYGON ((6.66892 6.50123, 6.67111 6.48484, 6....9550280646.9080853428.03427.12394.00.6477270.7648810.3541670.3384740.9526520.0435610.02299818168.0
ebonyi104.4893550.518050POLYGON ((8.24431 6.78560, 8.24976 6.77560, 8....1028196750.5556772274.32274.71393.00.2614000.4422970.0791890.0932630.9253140.0206420.01125931781.0
edo117.8894251.595809POLYGON ((6.13357 7.55208, 6.13835 7.55045, 6....1141389946.4688213529.93527.42350.00.7088990.7182500.3858220.3825040.9309200.0446460.03197617491.0
ekiti123.3956330.471634POLYGON ((5.50593 8.01869, 5.50872 8.01764, 5....1231740061.1761962537.62540.52361.00.6972830.7288840.2120500.5150620.9512700.0750150.05552315676.0
enugu134.3198930.624323POLYGON ((7.54536 7.00125, 7.54930 6.99101, 7....1342992817.5234502336.62335.72565.00.6257620.6708950.2629910.3742380.9446210.0941690.02171321611.0
gombe155.9552861.438747POLYGON ((11.07149 11.31249, 11.07479 11.31080...1531604940.3799621057.11054.72966.00.4437460.2835130.1234290.1552960.8064030.0350090.02633261166.0
imo162.8780880.414097POLYGON ((7.26575 5.93081, 7.26607 5.92765, 7....16528328814.6390382443.22445.52281.00.4832630.7131360.3605930.3595340.9580510.0828390.03156830418.0
jigawa1711.7714971.928873POLYGON ((10.23002 13.01060, 10.23084 13.01060...1756905162.9684961375.51372.21994.00.2851830.1590390.0723680.1227120.8335240.0436210.00414859928.0
kaduna1812.6136783.645639POLYGON ((8.16953 11.48727, 8.16955 11.47969, ...18800720516.3706121436.01436.34215.00.6160480.4372990.2171880.3279740.9009060.0885710.03303151184.0
kano198.2441951.685607POLYGON ((8.30528 12.58803, 8.31002 12.58233, ...191270677884.9499071547.91544.65440.00.5615190.3442400.1663910.2910820.8716970.1048720.04779185930.0
katsina209.1056371.991947POLYGON ((7.81802 13.33042, 7.82082 13.32941, ...2076455753.8424501656.61653.53623.00.4882090.2567040.1138660.2645200.8361410.0154970.00309966657.0
kebbi2114.9689813.035227POLYGON ((4.25355 13.26045, 4.26682 13.25108, ...2143114540.8520371767.21766.02182.00.3632430.2092560.0784090.1375060.8102210.0218080.01631445869.0
kogi2210.4763832.368882POLYGON ((6.51270 8.39867, 6.52740 8.37569, 6....2243272460.700438240.2241.31902.00.5295100.5846330.2380290.3076280.9042320.0592980.00807320254.0
kwara2312.0469632.766244POLYGON ((3.73068 10.06829, 3.73651 10.06829, ...2331295823.371945320.7324.13080.00.7375480.5756700.2825670.3680560.8781130.0548370.01173424156.0
lagos244.2877700.300166POLYGON ((3.32387 6.67388, 3.32879 6.67337, 3....2412102238159.433944261.1261.15721.00.9885950.9260410.5553830.7584240.9866940.1458440.15344728423.0
nassarawa259.5371882.197927POLYGON ((7.99731 9.31436, 8.00084 9.30643, 8....2524593210.820678440.9440.72500.00.5111430.4839500.2287880.2447350.9088120.0572480.00838334813.0
niger2618.9215595.930956POLYGON ((4.92994 11.35723, 4.93396 11.35402, ...2653946314.928378545.9547.83118.00.5262450.3755270.1861600.2830380.9404220.0325740.03341844723.0
ogun279.6497741.364021POLYGON ((2.84613 7.92988, 2.85077 7.92932, 2....2750483518.197456277.8277.53126.00.9151050.8100120.3796840.4634070.9672130.1030440.03190914628.0
ondo287.7511091.233546POLYGON ((5.92604 7.73182, 5.92513 7.72812, 5....2845417791.0810322844.72845.31886.00.4967080.6894920.1848830.3763500.9481170.0497760.00711119203.0
osun294.6601400.704549POLYGON ((4.94975 8.05548, 4.95674 8.05355, 4....2945966939.0359112922.12921.32815.00.7954220.5945180.1870580.3746820.8875390.0440800.01271515693.0
oyo308.6245862.264918POLYGON ((4.21046 8.97003, 4.25339 8.96697, 4....30763612218.7253323023.33023.83007.00.7240550.6689140.2699250.4599080.9441370.0712740.03515518953.0
plateau318.5755272.180996POLYGON ((8.79339 10.38465, 8.79925 10.38298, ...3140880382.139574653.1658.91776.00.3684650.3282160.0570540.0562240.9056020.0197100.00850630240.0
rivers325.8145450.830196POLYGON ((6.64358 5.70171, 6.64836 5.69990, 6....32708141276.1467083618.13618.23350.00.7838090.7508190.4272340.6527840.9597570.0999060.07580721524.0
sokoto3310.5031592.679547POLYGON ((5.51464 13.89442, 5.52912 13.89230, ...3348796415.0672131868.41865.52192.00.3963830.1931280.1083180.1835440.8699820.0217000.01320142264.0
taraba3413.6962554.802474POLYGON ((10.83822 9.61847, 10.84855 9.61720, ...3429841740.4751971177.01179.31182.00.2064630.2876860.0824450.1276860.8546720.0289960.01676944335.0
yobe3511.0969373.726688POLYGON ((11.22181 13.37400, 11.25518 13.37400...3631972960.1547211271.71268.81817.00.2686680.1249450.0677210.0665390.7850070.0178910.00916860049.0
zamfara369.4661442.782855POLYGON ((6.84870 13.08240, 6.87310 13.05311, ...3743769110.2493721966.71963.12010.00.3395840.1677650.0814330.1280620.7381310.0334520.00591351147.0
\n", "
" ], "text/plain": [ " index_x Shape_Leng Shape_Area ... pshwc pshwac Sample_pop\n", "STATE ... \n", "abia 0 4.695135 0.396543 ... 0.078769 0.046534 22232.0\n", "abuja 14 3.498412 0.607222 ... 0.164236 0.100190 33251.0\n", "adamawa 1 11.525443 3.113007 ... 0.052787 0.027760 33423.0\n", "akwa ibom 2 5.263830 0.549476 ... 0.067229 0.012900 20921.0\n", "anambra 3 3.595960 0.392661 ... 0.061651 0.012371 26223.0\n", "bauchi 4 13.952005 4.011018 ... 0.039626 0.015627 61599.0\n", "bayelsa 5 5.046708 0.776768 ... 0.047745 0.031390 21605.0\n", "benue 6 9.408080 2.578363 ... 0.048825 0.006078 27990.0\n", "borno 7 13.714364 5.987849 ... 0.060878 0.032559 43997.0\n", "cross river 8 8.779796 1.711218 ... 0.062362 0.033238 14401.0\n", "delta 9 7.372526 1.394082 ... 0.043561 0.022998 18168.0\n", "ebonyi 10 4.489355 0.518050 ... 0.020642 0.011259 31781.0\n", "edo 11 7.889425 1.595809 ... 0.044646 0.031976 17491.0\n", "ekiti 12 3.395633 0.471634 ... 0.075015 0.055523 15676.0\n", "enugu 13 4.319893 0.624323 ... 0.094169 0.021713 21611.0\n", "gombe 15 5.955286 1.438747 ... 0.035009 0.026332 61166.0\n", "imo 16 2.878088 0.414097 ... 0.082839 0.031568 30418.0\n", "jigawa 17 11.771497 1.928873 ... 0.043621 0.004148 59928.0\n", "kaduna 18 12.613678 3.645639 ... 0.088571 0.033031 51184.0\n", "kano 19 8.244195 1.685607 ... 0.104872 0.047791 85930.0\n", "katsina 20 9.105637 1.991947 ... 0.015497 0.003099 66657.0\n", "kebbi 21 14.968981 3.035227 ... 0.021808 0.016314 45869.0\n", "kogi 22 10.476383 2.368882 ... 0.059298 0.008073 20254.0\n", "kwara 23 12.046963 2.766244 ... 0.054837 0.011734 24156.0\n", "lagos 24 4.287770 0.300166 ... 0.145844 0.153447 28423.0\n", "nassarawa 25 9.537188 2.197927 ... 0.057248 0.008383 34813.0\n", "niger 26 18.921559 5.930956 ... 0.032574 0.033418 44723.0\n", "ogun 27 9.649774 1.364021 ... 0.103044 0.031909 14628.0\n", "ondo 28 7.751109 1.233546 ... 0.049776 0.007111 19203.0\n", "osun 29 4.660140 0.704549 ... 0.044080 0.012715 15693.0\n", "oyo 30 8.624586 2.264918 ... 0.071274 0.035155 18953.0\n", "plateau 31 8.575527 2.180996 ... 0.019710 0.008506 30240.0\n", "rivers 32 5.814545 0.830196 ... 0.099906 0.075807 21524.0\n", "sokoto 33 10.503159 2.679547 ... 0.021700 0.013201 42264.0\n", "taraba 34 13.696255 4.802474 ... 0.028996 0.016769 44335.0\n", "yobe 35 11.096937 3.726688 ... 0.017891 0.009168 60049.0\n", "zamfara 36 9.466144 2.782855 ... 0.033452 0.005913 51147.0\n", "\n", "[37 rows x 20 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 47 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 492 }, "id": "ZWLLMcRYLgZb", "outputId": "68279d96-9bee-40d9-a8b4-41c6caae1f45" }, "source": [ "combined2.head()" ], "execution_count": 48, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index_xShape_LengShape_AreaGeometryindex_yPOPULATIONmeanindex_x%_with_no_electindex_y%pop_with_no_electhtpspshwepshwtpshwrpshwipshwppshwcpshwacSample_pop
abia0.04.6951350.396543POLYGON ((7.38681 6.03667, 7.38729 6.03605, 7....0.03644714.09.89353620.010.520.08.93677.00.8911780.7712070.4115370.5719830.9566170.0787690.04653422232.0
abuja14.03.4984120.607222POLYGON ((7.67239 9.41128, 7.71959 9.34635, 7....14.02996670.04.9134110.019.20.022.73440.00.7271190.7038680.4066790.5060240.9289790.1642360.10019033251.0
adamawa1.011.5254433.113007POLYGON ((13.62129 10.94823, 13.62592 10.94822...1.04145684.00.7407577.055.27.057.31737.00.3653000.3268140.1007360.1312300.7846480.0527870.02776033423.0
akwa ibom2.05.2638300.549476MULTIPOLYGON (((8.34482 4.61140, 8.34496 4.609...2.05353609.041.60824331.026.331.025.32893.00.7176880.6005950.2763580.3984120.8886130.0672290.01290020921.0
anambra3.03.5959600.392661POLYGON ((6.93254 6.71090, 6.93167 6.69870, 6....3.05425334.012.46405621.018.521.016.14127.00.8369500.7503550.4128980.4670450.9823570.0616510.01237126223.0
\n", "
" ], "text/plain": [ " index_x Shape_Leng Shape_Area ... pshwc pshwac Sample_pop\n", "abia 0.0 4.695135 0.396543 ... 0.078769 0.046534 22232.0\n", "abuja 14.0 3.498412 0.607222 ... 0.164236 0.100190 33251.0\n", "adamawa 1.0 11.525443 3.113007 ... 0.052787 0.027760 33423.0\n", "akwa ibom 2.0 5.263830 0.549476 ... 0.067229 0.012900 20921.0\n", "anambra 3.0 3.595960 0.392661 ... 0.061651 0.012371 26223.0\n", "\n", "[5 rows x 20 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 48 } ] }, { "cell_type": "code", "metadata": { "id": "USoQqKYpQXzw" }, "source": [ "\"\"\"Column description \n", "'Shape_Leng': Length of state polygon from map\n", "'Shape_Area': Area coverage of state from shape file\n", "'Geometry': points of state polygon coverage\n", "'POPULATION' : Population of state\n", "'mean' : avg_rad per state from(night time zonal statistics)\n", "'%_with_no_elect' : % of house sample households without electricity\n", "'%pop_with_no_elect' : % of pop without electricity\n", "households_total_per_state is same as htps\n", "percent_sample_households_with_no_electricity : is same as pshwne per state\n", "percent_sample_households_with_television : is same as pshwt\n", "percent_sample_households_with_refegerator : is same as pshwr\n", "percent_sample_households_with_iron : is same as pshwi\n", "percent_sample_households_with_phone : is same as pshwp\n", "percent_sample_households_with_computer : is same as pshwc\n", "percent_sample_households_with_air_conditioner : is same as pshwa\n", "'Sample_pop':\n", "\"\"\"\n", "combined1=combined1[['Shape_Leng', 'Shape_Area', 'Geometry',\n", " 'POPULATION', 'mean', '%_with_no_elect',\n", " '%pop_with_no_elect', 'htps', 'pshwe', 'pshwt', 'pshwr', 'pshwi',\n", " 'pshwp', 'pshwc', 'pshwac', 'Sample_pop']]" ], "execution_count": 49, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "da6ROiHpVc5a" }, "source": [ "combined1.to_csv('state_electricity.csv', index=False)" ], "execution_count": 50, "outputs": [] } ] }