{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd, copy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#UNDATA dbd 1975-2013\n",
"\n",
"#http://data.un.org/Data.aspx?d=UNHCR&f=indID%3AType-Ref\n",
"#doanload data in batches, query limited to 50.000\n",
"df1=pd.read_csv('1985.csv') #years 1975-1984\n",
"df2=pd.read_csv('1995.csv') #years 1985-1994\n",
"df3=pd.read_csv('2005.csv') #years 1995-2004\n",
"df4=pd.read_csv('2015.csv') #years 2005-2013"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#UNHCR db 2001-2014\n",
"\n",
"#http://popstats.unhcr.org/en/overview#_ga=1.40654370.1278371767.1434418671 - select time series\n",
"df=pd.read_csv('all_data.csv',skiprows=3)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"#UNHCR db 1951-2014\n",
"\n",
"#http://popstats.unhcr.org/en/overview#_ga=1.40654370.1278371767.1434418671 - person of concern\n",
"df=pd.read_csv('all_data2.csv',skiprows=3)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" Year | \n",
" Country / territory of asylum/residence | \n",
" Origin / Returned from | \n",
" Refugees | \n",
" Asylum-seekers (pending cases) | \n",
" Returned refugees | \n",
" Internally displaced persons (IDPs) | \n",
" Returned IDPs | \n",
" Stateless persons | \n",
" Others of concern | \n",
" Total Population | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1951 | \n",
" NaN | \n",
" Various | \n",
" 8800 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 8800 | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" NaN | \n",
" Various | \n",
" 120000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 120000 | \n",
"
\n",
" \n",
" 2 | \n",
" 1951 | \n",
" Australia | \n",
" Various | \n",
" 180000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 180000 | \n",
"
\n",
" \n",
" 3 | \n",
" 1951 | \n",
" Austria | \n",
" Various | \n",
" 282000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 282000 | \n",
"
\n",
" \n",
" 4 | \n",
" 1951 | \n",
" Belgium | \n",
" Various | \n",
" 55000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 55000 | \n",
"
\n",
" \n",
" 5 | \n",
" 1951 | \n",
" Canada | \n",
" Various | \n",
" 168511 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 168511 | \n",
"
\n",
" \n",
" 6 | \n",
" 1951 | \n",
" Denmark | \n",
" Various | \n",
" 2000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2000 | \n",
"
\n",
" \n",
" 7 | \n",
" 1951 | \n",
" France | \n",
" Various | \n",
" 290000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 290000 | \n",
"
\n",
" \n",
" 8 | \n",
" 1951 | \n",
" Germany | \n",
" Various | \n",
" 265000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 265000 | \n",
"
\n",
" \n",
" 9 | \n",
" 1951 | \n",
" Greece | \n",
" Various | \n",
" 18000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 18000 | \n",
"
\n",
" \n",
" 10 | \n",
" 1951 | \n",
" Hong Kong SAR, China | \n",
" Various | \n",
" 30000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 30000 | \n",
"
\n",
" \n",
" 11 | \n",
" 1951 | \n",
" Italy | \n",
" Various | \n",
" 26500 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 26500 | \n",
"
\n",
" \n",
" 12 | \n",
" 1951 | \n",
" Luxembourg | \n",
" Various | \n",
" 1800 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1800 | \n",
"
\n",
" \n",
" 13 | \n",
" 1951 | \n",
" Morocco | \n",
" Various | \n",
" 3000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 3000 | \n",
"
\n",
" \n",
" 14 | \n",
" 1951 | \n",
" Netherlands | \n",
" Various | \n",
" 14200 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 14200 | \n",
"
\n",
" \n",
" 15 | \n",
" 1951 | \n",
" Norway | \n",
" Various | \n",
" 2500 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2500 | \n",
"
\n",
" \n",
" 16 | \n",
" 1951 | \n",
" Spain | \n",
" Various | \n",
" 2000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2000 | \n",
"
\n",
" \n",
" 17 | \n",
" 1951 | \n",
" Sweden | \n",
" Various | \n",
" 44000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 44000 | \n",
"
\n",
" \n",
" 18 | \n",
" 1951 | \n",
" Switzerland | \n",
" Various | \n",
" 10000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 10000 | \n",
"
\n",
" \n",
" 19 | \n",
" 1951 | \n",
" Tunisia | \n",
" Various | \n",
" 2000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2000 | \n",
"
\n",
" \n",
" 20 | \n",
" 1951 | \n",
" Turkey | \n",
" Various | \n",
" 2700 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2700 | \n",
"
\n",
" \n",
" 21 | \n",
" 1951 | \n",
" United Kingdom | \n",
" Various | \n",
" 208000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 208000 | \n",
"
\n",
" \n",
" 22 | \n",
" 1951 | \n",
" United States | \n",
" Various | \n",
" 350000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 350000 | \n",
"
\n",
" \n",
" 23 | \n",
" 1951 | \n",
" Various | \n",
" Various | \n",
" 30000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 30000 | \n",
"
\n",
" \n",
" 24 | \n",
" 1952 | \n",
" NaN | \n",
" Various | \n",
" 14000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 14000 | \n",
"
\n",
" \n",
" 25 | \n",
" 1952 | \n",
" NaN | \n",
" Various | \n",
" 120000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 120000 | \n",
"
\n",
" \n",
" 26 | \n",
" 1952 | \n",
" Austria | \n",
" Various | \n",
" 238200 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 238200 | \n",
"
\n",
" \n",
" 27 | \n",
" 1952 | \n",
" Belgium | \n",
" Various | \n",
" 53500 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 53500 | \n",
"
\n",
" \n",
" 28 | \n",
" 1952 | \n",
" Canada | \n",
" Various | \n",
" 154828 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 154828 | \n",
"
\n",
" \n",
" 29 | \n",
" 1952 | \n",
" Denmark | \n",
" Various | \n",
" 1800 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1800 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 88272 | \n",
" 2014 | \n",
" Hungary | \n",
" Zimbabwe | \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 4 | \n",
"
\n",
" \n",
" 88273 | \n",
" 2014 | \n",
" Ireland | \n",
" Zimbabwe | \n",
" 157 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 157 | \n",
"
\n",
" \n",
" 88274 | \n",
" 2014 | \n",
" Israel | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88275 | \n",
" 2014 | \n",
" Italy | \n",
" Zimbabwe | \n",
" 27 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 27 | \n",
"
\n",
" \n",
" 88276 | \n",
" 2014 | \n",
" Kenya | \n",
" Zimbabwe | \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 4 | \n",
"
\n",
" \n",
" 88277 | \n",
" 2014 | \n",
" Lesotho | \n",
" Zimbabwe | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 7 | \n",
"
\n",
" \n",
" 88278 | \n",
" 2014 | \n",
" Lithuania | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88279 | \n",
" 2014 | \n",
" Malaysia | \n",
" Zimbabwe | \n",
" 2 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
"
\n",
" \n",
" 88280 | \n",
" 2014 | \n",
" Namibia | \n",
" Zimbabwe | \n",
" 32 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 32 | \n",
"
\n",
" \n",
" 88281 | \n",
" 2014 | \n",
" Netherlands | \n",
" Zimbabwe | \n",
" 45 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 45 | \n",
"
\n",
" \n",
" 88282 | \n",
" 2014 | \n",
" New Zealand | \n",
" Zimbabwe | \n",
" 47 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 47 | \n",
"
\n",
" \n",
" 88283 | \n",
" 2014 | \n",
" Norway | \n",
" Zimbabwe | \n",
" 23 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 23 | \n",
"
\n",
" \n",
" 88284 | \n",
" 2014 | \n",
" Panama | \n",
" Zimbabwe | \n",
" 2 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
"
\n",
" \n",
" 88285 | \n",
" 2014 | \n",
" Poland | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88286 | \n",
" 2014 | \n",
" Portugal | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88287 | \n",
" 2014 | \n",
" Rep. of Moldova | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88288 | \n",
" 2014 | \n",
" Romania | \n",
" Zimbabwe | \n",
" 5 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5 | \n",
"
\n",
" \n",
" 88289 | \n",
" 2014 | \n",
" Slovenia | \n",
" Zimbabwe | \n",
" 2 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
"
\n",
" \n",
" 88290 | \n",
" 2014 | \n",
" South Africa | \n",
" Zimbabwe | \n",
" 6217 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 6217 | \n",
"
\n",
" \n",
" 88291 | \n",
" 2014 | \n",
" Spain | \n",
" Zimbabwe | \n",
" 2 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
"
\n",
" \n",
" 88292 | \n",
" 2014 | \n",
" Sudan | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88293 | \n",
" 2014 | \n",
" Swaziland | \n",
" Zimbabwe | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 7 | \n",
"
\n",
" \n",
" 88294 | \n",
" 2014 | \n",
" Sweden | \n",
" Zimbabwe | \n",
" 41 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 41 | \n",
"
\n",
" \n",
" 88295 | \n",
" 2014 | \n",
" Switzerland | \n",
" Zimbabwe | \n",
" 14 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 14 | \n",
"
\n",
" \n",
" 88296 | \n",
" 2014 | \n",
" Thailand | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88297 | \n",
" 2014 | \n",
" Ukraine | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88298 | \n",
" 2014 | \n",
" United Kingdom | \n",
" Zimbabwe | \n",
" 9467 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 9467 | \n",
"
\n",
" \n",
" 88299 | \n",
" 2014 | \n",
" United States | \n",
" Zimbabwe | \n",
" 1492 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1492 | \n",
"
\n",
" \n",
" 88300 | \n",
" 2014 | \n",
" Uruguay | \n",
" Zimbabwe | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 88301 | \n",
" 2014 | \n",
" Zambia | \n",
" Zimbabwe | \n",
" 6 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
88302 rows × 11 columns
\n",
"
"
],
"text/plain": [
" Year Country / territory of asylum/residence Origin / Returned from \\\n",
"0 1951 NaN Various \n",
"1 1951 NaN Various \n",
"2 1951 Australia Various \n",
"3 1951 Austria Various \n",
"4 1951 Belgium Various \n",
"5 1951 Canada Various \n",
"6 1951 Denmark Various \n",
"7 1951 France Various \n",
"8 1951 Germany Various \n",
"9 1951 Greece Various \n",
"10 1951 Hong Kong SAR, China Various \n",
"11 1951 Italy Various \n",
"12 1951 Luxembourg Various \n",
"13 1951 Morocco Various \n",
"14 1951 Netherlands Various \n",
"15 1951 Norway Various \n",
"16 1951 Spain Various \n",
"17 1951 Sweden Various \n",
"18 1951 Switzerland Various \n",
"19 1951 Tunisia Various \n",
"20 1951 Turkey Various \n",
"21 1951 United Kingdom Various \n",
"22 1951 United States Various \n",
"23 1951 Various Various \n",
"24 1952 NaN Various \n",
"25 1952 NaN Various \n",
"26 1952 Austria Various \n",
"27 1952 Belgium Various \n",
"28 1952 Canada Various \n",
"29 1952 Denmark Various \n",
"... ... ... ... \n",
"88272 2014 Hungary Zimbabwe \n",
"88273 2014 Ireland Zimbabwe \n",
"88274 2014 Israel Zimbabwe \n",
"88275 2014 Italy Zimbabwe \n",
"88276 2014 Kenya Zimbabwe \n",
"88277 2014 Lesotho Zimbabwe \n",
"88278 2014 Lithuania Zimbabwe \n",
"88279 2014 Malaysia Zimbabwe \n",
"88280 2014 Namibia Zimbabwe \n",
"88281 2014 Netherlands Zimbabwe \n",
"88282 2014 New Zealand Zimbabwe \n",
"88283 2014 Norway Zimbabwe \n",
"88284 2014 Panama Zimbabwe \n",
"88285 2014 Poland Zimbabwe \n",
"88286 2014 Portugal Zimbabwe \n",
"88287 2014 Rep. of Moldova Zimbabwe \n",
"88288 2014 Romania Zimbabwe \n",
"88289 2014 Slovenia Zimbabwe \n",
"88290 2014 South Africa Zimbabwe \n",
"88291 2014 Spain Zimbabwe \n",
"88292 2014 Sudan Zimbabwe \n",
"88293 2014 Swaziland Zimbabwe \n",
"88294 2014 Sweden Zimbabwe \n",
"88295 2014 Switzerland Zimbabwe \n",
"88296 2014 Thailand Zimbabwe \n",
"88297 2014 Ukraine Zimbabwe \n",
"88298 2014 United Kingdom Zimbabwe \n",
"88299 2014 United States Zimbabwe \n",
"88300 2014 Uruguay Zimbabwe \n",
"88301 2014 Zambia Zimbabwe \n",
"\n",
" Refugees Asylum-seekers (pending cases) Returned refugees \\\n",
"0 8800 NaN NaN \n",
"1 120000 NaN NaN \n",
"2 180000 NaN NaN \n",
"3 282000 NaN NaN \n",
"4 55000 NaN NaN \n",
"5 168511 NaN NaN \n",
"6 2000 NaN NaN \n",
"7 290000 NaN NaN \n",
"8 265000 NaN NaN \n",
"9 18000 NaN NaN \n",
"10 30000 NaN NaN \n",
"11 26500 NaN NaN \n",
"12 1800 NaN NaN \n",
"13 3000 NaN NaN \n",
"14 14200 NaN NaN \n",
"15 2500 NaN NaN \n",
"16 2000 NaN NaN \n",
"17 44000 NaN NaN \n",
"18 10000 NaN NaN \n",
"19 2000 NaN NaN \n",
"20 2700 NaN NaN \n",
"21 208000 NaN NaN \n",
"22 350000 NaN NaN \n",
"23 30000 NaN NaN \n",
"24 14000 NaN NaN \n",
"25 120000 NaN NaN \n",
"26 238200 NaN NaN \n",
"27 53500 NaN NaN \n",
"28 154828 NaN NaN \n",
"29 1800 NaN NaN \n",
"... ... ... ... \n",
"88272 4 NaN NaN \n",
"88273 157 NaN NaN \n",
"88274 1 NaN NaN \n",
"88275 27 NaN NaN \n",
"88276 4 NaN NaN \n",
"88277 7 NaN NaN \n",
"88278 1 NaN NaN \n",
"88279 2 NaN NaN \n",
"88280 32 NaN NaN \n",
"88281 45 NaN NaN \n",
"88282 47 NaN NaN \n",
"88283 23 NaN NaN \n",
"88284 2 NaN NaN \n",
"88285 1 NaN NaN \n",
"88286 1 NaN NaN \n",
"88287 1 NaN NaN \n",
"88288 5 NaN NaN \n",
"88289 2 NaN NaN \n",
"88290 6217 NaN NaN \n",
"88291 2 NaN NaN \n",
"88292 1 NaN NaN \n",
"88293 7 NaN NaN \n",
"88294 41 NaN NaN \n",
"88295 14 NaN NaN \n",
"88296 1 NaN NaN \n",
"88297 1 NaN NaN \n",
"88298 9467 NaN NaN \n",
"88299 1492 NaN NaN \n",
"88300 1 NaN NaN \n",
"88301 6 NaN NaN \n",
"\n",
" Internally displaced persons (IDPs) Returned IDPs Stateless persons \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"5 NaN NaN NaN \n",
"6 NaN NaN NaN \n",
"7 NaN NaN NaN \n",
"8 NaN NaN NaN \n",
"9 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"11 NaN NaN NaN \n",
"12 NaN NaN NaN \n",
"13 NaN NaN NaN \n",
"14 NaN NaN NaN \n",
"15 NaN NaN NaN \n",
"16 NaN NaN NaN \n",
"17 NaN NaN NaN \n",
"18 NaN NaN NaN \n",
"19 NaN NaN NaN \n",
"20 NaN NaN NaN \n",
"21 NaN NaN NaN \n",
"22 NaN NaN NaN \n",
"23 NaN NaN NaN \n",
"24 NaN NaN NaN \n",
"25 NaN NaN NaN \n",
"26 NaN NaN NaN \n",
"27 NaN NaN NaN \n",
"28 NaN NaN NaN \n",
"29 NaN NaN NaN \n",
"... ... ... ... \n",
"88272 NaN NaN NaN \n",
"88273 NaN NaN NaN \n",
"88274 NaN NaN NaN \n",
"88275 NaN NaN NaN \n",
"88276 NaN NaN NaN \n",
"88277 NaN NaN NaN \n",
"88278 NaN NaN NaN \n",
"88279 NaN NaN NaN \n",
"88280 NaN NaN NaN \n",
"88281 NaN NaN NaN \n",
"88282 NaN NaN NaN \n",
"88283 NaN NaN NaN \n",
"88284 NaN NaN NaN \n",
"88285 NaN NaN NaN \n",
"88286 NaN NaN NaN \n",
"88287 NaN NaN NaN \n",
"88288 NaN NaN NaN \n",
"88289 NaN NaN NaN \n",
"88290 NaN NaN NaN \n",
"88291 NaN NaN NaN \n",
"88292 NaN NaN NaN \n",
"88293 NaN NaN NaN \n",
"88294 NaN NaN NaN \n",
"88295 NaN NaN NaN \n",
"88296 NaN NaN NaN \n",
"88297 NaN NaN NaN \n",
"88298 NaN NaN NaN \n",
"88299 NaN NaN NaN \n",
"88300 NaN NaN NaN \n",
"88301 NaN NaN NaN \n",
"\n",
" Others of concern Total Population \n",
"0 NaN 8800 \n",
"1 NaN 120000 \n",
"2 NaN 180000 \n",
"3 NaN 282000 \n",
"4 NaN 55000 \n",
"5 NaN 168511 \n",
"6 NaN 2000 \n",
"7 NaN 290000 \n",
"8 NaN 265000 \n",
"9 NaN 18000 \n",
"10 NaN 30000 \n",
"11 NaN 26500 \n",
"12 NaN 1800 \n",
"13 NaN 3000 \n",
"14 NaN 14200 \n",
"15 NaN 2500 \n",
"16 NaN 2000 \n",
"17 NaN 44000 \n",
"18 NaN 10000 \n",
"19 NaN 2000 \n",
"20 NaN 2700 \n",
"21 NaN 208000 \n",
"22 NaN 350000 \n",
"23 NaN 30000 \n",
"24 NaN 14000 \n",
"25 NaN 120000 \n",
"26 NaN 238200 \n",
"27 NaN 53500 \n",
"28 NaN 154828 \n",
"29 NaN 1800 \n",
"... ... ... \n",
"88272 NaN 4 \n",
"88273 NaN 157 \n",
"88274 NaN 1 \n",
"88275 NaN 27 \n",
"88276 NaN 4 \n",
"88277 NaN 7 \n",
"88278 NaN 1 \n",
"88279 NaN 2 \n",
"88280 NaN 32 \n",
"88281 NaN 45 \n",
"88282 NaN 47 \n",
"88283 NaN 23 \n",
"88284 NaN 2 \n",
"88285 NaN 1 \n",
"88286 NaN 1 \n",
"88287 NaN 1 \n",
"88288 NaN 5 \n",
"88289 NaN 2 \n",
"88290 NaN 6217 \n",
"88291 NaN 2 \n",
"88292 NaN 1 \n",
"88293 NaN 7 \n",
"88294 NaN 41 \n",
"88295 NaN 14 \n",
"88296 NaN 1 \n",
"88297 NaN 1 \n",
"88298 NaN 9467 \n",
"88299 NaN 1492 \n",
"88300 NaN 1 \n",
"88301 NaN 6 \n",
"\n",
"[88302 rows x 11 columns]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" target | \n",
" source | \n",
" value | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" 1951 | \n",
" Australia | \n",
" Various | \n",
" 180000 | \n",
"
\n",
" \n",
" 3 | \n",
" 1951 | \n",
" Austria | \n",
" Various | \n",
" 282000 | \n",
"
\n",
" \n",
" 4 | \n",
" 1951 | \n",
" Belgium | \n",
" Various | \n",
" 55000 | \n",
"
\n",
" \n",
" 5 | \n",
" 1951 | \n",
" Canada | \n",
" Various | \n",
" 168511 | \n",
"
\n",
" \n",
" 6 | \n",
" 1951 | \n",
" Denmark | \n",
" Various | \n",
" 2000 | \n",
"
\n",
" \n",
" 7 | \n",
" 1951 | \n",
" France | \n",
" Various | \n",
" 290000 | \n",
"
\n",
" \n",
" 8 | \n",
" 1951 | \n",
" Germany | \n",
" Various | \n",
" 265000 | \n",
"
\n",
" \n",
" 9 | \n",
" 1951 | \n",
" Greece | \n",
" Various | \n",
" 18000 | \n",
"
\n",
" \n",
" 10 | \n",
" 1951 | \n",
" Hong Kong SAR, China | \n",
" Various | \n",
" 30000 | \n",
"
\n",
" \n",
" 11 | \n",
" 1951 | \n",
" Italy | \n",
" Various | \n",
" 26500 | \n",
"
\n",
" \n",
" 12 | \n",
" 1951 | \n",
" Luxembourg | \n",
" Various | \n",
" 1800 | \n",
"
\n",
" \n",
" 13 | \n",
" 1951 | \n",
" Morocco | \n",
" Various | \n",
" 3000 | \n",
"
\n",
" \n",
" 14 | \n",
" 1951 | \n",
" Netherlands | \n",
" Various | \n",
" 14200 | \n",
"
\n",
" \n",
" 15 | \n",
" 1951 | \n",
" Norway | \n",
" Various | \n",
" 2500 | \n",
"
\n",
" \n",
" 16 | \n",
" 1951 | \n",
" Spain | \n",
" Various | \n",
" 2000 | \n",
"
\n",
" \n",
" 17 | \n",
" 1951 | \n",
" Sweden | \n",
" Various | \n",
" 44000 | \n",
"
\n",
" \n",
" 18 | \n",
" 1951 | \n",
" Switzerland | \n",
" Various | \n",
" 10000 | \n",
"
\n",
" \n",
" 19 | \n",
" 1951 | \n",
" Tunisia | \n",
" Various | \n",
" 2000 | \n",
"
\n",
" \n",
" 20 | \n",
" 1951 | \n",
" Turkey | \n",
" Various | \n",
" 2700 | \n",
"
\n",
" \n",
" 21 | \n",
" 1951 | \n",
" United Kingdom | \n",
" Various | \n",
" 208000 | \n",
"
\n",
" \n",
" 22 | \n",
" 1951 | \n",
" United States | \n",
" Various | \n",
" 350000 | \n",
"
\n",
" \n",
" 23 | \n",
" 1951 | \n",
" Various | \n",
" Various | \n",
" 30000 | \n",
"
\n",
" \n",
" 26 | \n",
" 1952 | \n",
" Austria | \n",
" Various | \n",
" 238200 | \n",
"
\n",
" \n",
" 27 | \n",
" 1952 | \n",
" Belgium | \n",
" Various | \n",
" 53500 | \n",
"
\n",
" \n",
" 28 | \n",
" 1952 | \n",
" Canada | \n",
" Various | \n",
" 154828 | \n",
"
\n",
" \n",
" 29 | \n",
" 1952 | \n",
" Denmark | \n",
" Various | \n",
" 1800 | \n",
"
\n",
" \n",
" 30 | \n",
" 1952 | \n",
" France | \n",
" Various | \n",
" 280000 | \n",
"
\n",
" \n",
" 31 | \n",
" 1952 | \n",
" Germany | \n",
" Various | \n",
" 240000 | \n",
"
\n",
" \n",
" 32 | \n",
" 1952 | \n",
" Greece | \n",
" Various | \n",
" 19000 | \n",
"
\n",
" \n",
" 33 | \n",
" 1952 | \n",
" Italy | \n",
" Various | \n",
" 25500 | \n",
"
\n",
" \n",
" 34 | \n",
" 1952 | \n",
" Luxembourg | \n",
" Various | \n",
" 1800 | \n",
"
\n",
" \n",
" 35 | \n",
" 1952 | \n",
" Morocco | \n",
" Various | \n",
" 2900 | \n",
"
\n",
" \n",
" 36 | \n",
" 1952 | \n",
" Netherlands | \n",
" Various | \n",
" 14100 | \n",
"
\n",
" \n",
" 37 | \n",
" 1952 | \n",
" Norway | \n",
" Various | \n",
" 2500 | \n",
"
\n",
" \n",
" 38 | \n",
" 1952 | \n",
" Sweden | \n",
" Various | \n",
" 42000 | \n",
"
\n",
" \n",
" 39 | \n",
" 1952 | \n",
" Switzerland | \n",
" Various | \n",
" 9800 | \n",
"
\n",
" \n",
" 40 | \n",
" 1952 | \n",
" Turkey | \n",
" Various | \n",
" 2000 | \n",
"
\n",
" \n",
" 41 | \n",
" 1952 | \n",
" United Kingdom | \n",
" Various | \n",
" 200000 | \n",
"
\n",
" \n",
" 42 | \n",
" 1952 | \n",
" United States | \n",
" Various | \n",
" 500000 | \n",
"
\n",
" \n",
" 43 | \n",
" 1952 | \n",
" Various | \n",
" Various | \n",
" 31000 | \n",
"
\n",
" \n",
" 46 | \n",
" 1953 | \n",
" Austria | \n",
" Various | \n",
" 215200 | \n",
"
\n",
" \n",
" 47 | \n",
" 1953 | \n",
" Belgium | \n",
" Various | \n",
" 53000 | \n",
"
\n",
" \n",
" 48 | \n",
" 1953 | \n",
" Canada | \n",
" Various | \n",
" 107004 | \n",
"
\n",
" \n",
" 49 | \n",
" 1953 | \n",
" Denmark | \n",
" Various | \n",
" 1600 | \n",
"
\n",
" \n",
" 50 | \n",
" 1953 | \n",
" France | \n",
" Various | \n",
" 270000 | \n",
"
\n",
" \n",
" 51 | \n",
" 1953 | \n",
" Germany | \n",
" Various | \n",
" 228000 | \n",
"
\n",
" \n",
" 52 | \n",
" 1953 | \n",
" Greece | \n",
" Various | \n",
" 18500 | \n",
"
\n",
" \n",
" 53 | \n",
" 1953 | \n",
" Italy | \n",
" Various | \n",
" 24500 | \n",
"
\n",
" \n",
" 54 | \n",
" 1953 | \n",
" Luxembourg | \n",
" Various | \n",
" 1800 | \n",
"
\n",
" \n",
" 55 | \n",
" 1953 | \n",
" Morocco | \n",
" Various | \n",
" 2800 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year target source value\n",
"2 1951 Australia Various 180000\n",
"3 1951 Austria Various 282000\n",
"4 1951 Belgium Various 55000\n",
"5 1951 Canada Various 168511\n",
"6 1951 Denmark Various 2000\n",
"7 1951 France Various 290000\n",
"8 1951 Germany Various 265000\n",
"9 1951 Greece Various 18000\n",
"10 1951 Hong Kong SAR, China Various 30000\n",
"11 1951 Italy Various 26500\n",
"12 1951 Luxembourg Various 1800\n",
"13 1951 Morocco Various 3000\n",
"14 1951 Netherlands Various 14200\n",
"15 1951 Norway Various 2500\n",
"16 1951 Spain Various 2000\n",
"17 1951 Sweden Various 44000\n",
"18 1951 Switzerland Various 10000\n",
"19 1951 Tunisia Various 2000\n",
"20 1951 Turkey Various 2700\n",
"21 1951 United Kingdom Various 208000\n",
"22 1951 United States Various 350000\n",
"23 1951 Various Various 30000\n",
"26 1952 Austria Various 238200\n",
"27 1952 Belgium Various 53500\n",
"28 1952 Canada Various 154828\n",
"29 1952 Denmark Various 1800\n",
"30 1952 France Various 280000\n",
"31 1952 Germany Various 240000\n",
"32 1952 Greece Various 19000\n",
"33 1952 Italy Various 25500\n",
"34 1952 Luxembourg Various 1800\n",
"35 1952 Morocco Various 2900\n",
"36 1952 Netherlands Various 14100\n",
"37 1952 Norway Various 2500\n",
"38 1952 Sweden Various 42000\n",
"39 1952 Switzerland Various 9800\n",
"40 1952 Turkey Various 2000\n",
"41 1952 United Kingdom Various 200000\n",
"42 1952 United States Various 500000\n",
"43 1952 Various Various 31000\n",
"46 1953 Austria Various 215200\n",
"47 1953 Belgium Various 53000\n",
"48 1953 Canada Various 107004\n",
"49 1953 Denmark Various 1600\n",
"50 1953 France Various 270000\n",
"51 1953 Germany Various 228000\n",
"52 1953 Greece Various 18500\n",
"53 1953 Italy Various 24500\n",
"54 1953 Luxembourg Various 1800\n",
"55 1953 Morocco Various 2800"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns=['year','target','source','a','b','c','d','e','f','g','value']\n",
"df=df.drop(['a','b','c','d','e','f','g',],axis=1).dropna()#.set_index(['year','target','source'])[:80440]\n",
"df.head(50)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" value | \n",
"
\n",
" \n",
" source | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Afghanistan | \n",
" 1517903 | \n",
"
\n",
" \n",
" Algeria | \n",
" 17 | \n",
"
\n",
" \n",
" Ethiopia | \n",
" 4 | \n",
"
\n",
" \n",
" Ghana | \n",
" 1 | \n",
"
\n",
" \n",
" Iraq | \n",
" 43 | \n",
"
\n",
" \n",
" Islamic Rep. of Iran | \n",
" 56 | \n",
"
\n",
" \n",
" Myanmar | \n",
" 23 | \n",
"
\n",
" \n",
" Pakistan | \n",
" 1451729 | \n",
"
\n",
" \n",
" Russian Federation | \n",
" 7 | \n",
"
\n",
" \n",
" Rwanda | \n",
" 1 | \n",
"
\n",
" \n",
" Somalia | \n",
" 412 | \n",
"
\n",
" \n",
" State of Palestine | \n",
" 16 | \n",
"
\n",
" \n",
" Sudan | \n",
" 2 | \n",
"
\n",
" \n",
" Syrian Arab Rep. | \n",
" 16 | \n",
"
\n",
" \n",
" Turkey | \n",
" 7 | \n",
"
\n",
" \n",
" Uganda | \n",
" 1 | \n",
"
\n",
" \n",
" Uzbekistan | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" value\n",
"source \n",
"Afghanistan 1517903\n",
"Algeria 17\n",
"Ethiopia 4\n",
"Ghana 1\n",
"Iraq 43\n",
"Islamic Rep. of Iran 56\n",
"Myanmar 23\n",
"Pakistan 1451729\n",
"Russian Federation 7\n",
"Rwanda 1\n",
"Somalia 412\n",
"State of Palestine 16\n",
"Sudan 2\n",
"Syrian Arab Rep. 16\n",
"Turkey 7\n",
"Uganda 1\n",
"Uzbekistan 7"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.set_index(['year','target','source']).loc[2014].loc['Pakistan']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"palestinian data, from world bank"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"def interpolate(d,years,gfit=2,depth=1,polyorder=1,override=True):\n",
" #depth * length of interpolation substrings will be taken to the left and right\n",
" #for example for {1971:5,1972:6,1973:7,1974:5} interpolating it over 1969-1990\n",
" #for the section 1960-1970 (2 elements) the values from 1972,1973,1974 (3 elements) will be taken with depth 1.5\n",
" #for the section 1974-1990 (15 elements) all values (4 elements) will be taken to extrapolate\n",
" if (gfit>2): \n",
" print 'interpolate takes only 1 (polynomial) or 2 (exponential) as 3rd argument [default=2]'\n",
" return\n",
" mydict={}\n",
" missing_points=[[]]\n",
" for year in years:\n",
" if year not in d.keys():\n",
" missing_points[-1].append(year)\n",
" else:\n",
" missing_points.append([])\n",
" for m in missing_points:\n",
" if m:\n",
" fit=gfit\n",
" if ((m[-1]np.sort(d.keys())[-1])): #check if it is ends of the interval, then extrapolate mean only\n",
" if not override: fit=0\n",
" \n",
" if fit==0: #take average\n",
" y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(3)),min(max(years),max(m)+int(3))+1))}\n",
" for i in range(len(m)):\n",
" mydict[m[i]]=np.mean(y.values())\n",
" elif fit==1:\n",
" #intersector\n",
" y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}\n",
" #print y\n",
" w = np.polyfit(y.keys(),y.values(),polyorder) # obtaining regression parameters\n",
" if (polyorder==1):\n",
" intersector=w[0]*np.array(m)+w[1]\n",
" else:\n",
" intersector=w[0]*np.array(m)*np.array(m)+w[1]*np.array(m)+w[2]\n",
" for i in range(len(m)):\n",
" mydict[m[i]]=max(0,intersector[i])\n",
" else:\n",
" #intersector\n",
" y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}\n",
" #print y\n",
" w = np.polyfit(y.keys(),np.log(y.values()),1) # obtaining log regression parameters (exp fitting)\n",
" intersector=np.exp(w[1])*np.exp(w[0]*np.array(m))\n",
" for i in range(len(m)):\n",
" mydict[m[i]]=max(0,intersector[i])\n",
" \n",
" #return interpolated points\n",
" return mydict"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#http://data.worldbank.org/indicator/SM.POP.REFG/countries?display=default\n",
"dz=pd.read_csv('pal.csv',skiprows=2)\n",
"dz.columns=['country','cc','a','b']+range(1960,2016)\n",
"dz=dz.set_index('country').drop(['cc','a','b']+range(1960,1975)+range(2014,2016),axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"ccc={'Syrian Arab Republic':'Syria','West Bank and Gaza':'Palestine','Jordan':'Jordan','Lebanon':'Lebanon'}\n",
"ccr={'Syrian Arab Republic':'Syrian Arab Rep.','West Bank and Gaza':'State of Palestine','Jordan':'Jordan','Lebanon':'Lebanon'}"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"dx=pd.concat([pd.DataFrame(dz.loc['Jordan']).T,pd.DataFrame(dz.loc['Lebanon']).T\\\n",
" ,pd.DataFrame(dz.loc['Syrian Arab Republic']).T,pd.DataFrame(dz.loc['West Bank and Gaza']).T])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n",
" warnings.warn(msg, RankWarning)\n",
"C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n",
" warnings.warn(msg, RankWarning)\n",
"C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n",
" warnings.warn(msg, RankWarning)\n",
"C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n",
" warnings.warn(msg, RankWarning)\n"
]
}
],
"source": [
"#reset df, without setting index to run this parts\n",
"palref=['Jordan','Lebanon','Syrian Arab Republic','West Bank and Gaza']\n",
"dc=pd.DataFrame(columns=['year','target','source','value'])\n",
" \n",
"for k in range(4):\n",
" hp={}\n",
" for i in dx.loc[palref[k]].iteritems():\n",
" if ~np.isnan(i[1]):\n",
" hp[i[0]]=i[1]\n",
" hp.update(interpolate(hp,range(1951,2015)))\n",
" for y in hp:\n",
" val=hp[y]-df[((df['target']==ccr[palref[k]])&(df['year']==y))].sum()[3]\n",
" dc.loc[y+k*1000]=[y,ccc[palref[k]],'Palestine',val]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"append plaestine data to main dataframe"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"df=pd.concat([df,dc])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"save data"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def cc(country):\n",
" if country in cc2:\n",
" return cc2[country]\n",
" else: return country\n",
"cc2={\n",
"'Bolivia (Plurinational State of)':'Bolivia',\n",
"'Micronesia (Federated States of)':'Micronesia',\n",
"'Serbia (and Kosovo: S/RES/1244 (1999))':'Serbia & Kosovo',\n",
"'The former Yugoslav Rep. of Macedonia':'FYROM',\n",
"'Venezuela (Bolivarian Republic of)':'Venezuela',\n",
"'Dem. Rep. of the Congo':'DRC',\n",
"'Central African Rep.':'CAR',\n",
"\"Dem. People's Rep. of Korea\":\"North Korea\",\n",
"'Islamic Rep. of Iran':'Iran',\n",
"\"Lao People's Dem. Rep.\":'Lao PDR',\n",
"'Papua New Guinea':'PNG',\n",
"'Syrian Arab Rep.':'Syria',\n",
"'United Rep. of Tanzania':'Tanzania',\n",
"'United Arab Emirates':'UAE',\n",
"'Antigua and Barbuda':'Antig. & Barb',\n",
"'Bosnia and Herzegovina':'Bosnia & Herz.',\n",
"'British Virgin Islands':'UK Virgin',\n",
"'Brunei Darussalam':'Brunei',\n",
"'Hong Kong SAR, China':'Hong Kong',\n",
"'Macao SAR, China':'Macao',\n",
"'Russian Federation':'Russia',\n",
"'Saint Kitts and Nevis':'St. Kitts & Nev.',\n",
"'Saint Vincent and the Grenadines':'St. Vinc. & Gren.',\n",
"'Sint Maarten (Dutch part)':'St. Maarten',\n",
"'State of Palestine':'Palestine',\n",
"'Trinidad and Tobago':'Trinid. & Tob.',\n",
"'Turks and Caicos Islands':'Turks & Caicos'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"data={}\n",
"countries=set()\n",
"for i in df.T.iteritems():\n",
" year=int(i[1][0])\n",
" target=cc(i[1][1])\n",
" source=cc(i[1][2])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" value=i[1][3]\n",
" countries.add(target)\n",
" countries.add(source)\n",
" if year not in data:data[year]={}\n",
" if ((source in data[year]) and (target in data[year][source])):\n",
" data[year][source][target][1]=value\n",
" else:\n",
" if target not in data[year]:data[year][target]={}\n",
" if source not in data[year][target]:data[year][target][source]=[0,0]\n",
" data[year][target][source][0]=value "
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1951 0\n",
"1952 0\n",
"1953 0\n",
"1954 0\n",
"1955 0\n",
"1956 0\n",
"1957 0\n",
"1958 0\n",
"1959 0\n",
"1960 0\n",
"1961 0\n",
"1962 0\n",
"1963 0\n",
"1964 0\n",
"1965 0\n",
"1966 0\n",
"1967 0\n",
"1968 0\n",
"1969 0\n",
"1970 0\n",
"1971 0\n",
"1972 0\n",
"1973 0\n",
"1974 0\n",
"1975 0\n",
"1976 0\n",
"1977 0\n",
"1978 0\n",
"1979 0\n",
"1980 0\n",
"1981 0\n",
"1982 0\n",
"1983 0\n",
"1984 0\n",
"1985 0\n",
"1986 0\n",
"1987 0\n",
"1988 0\n",
"1989 0\n",
"1990 0\n",
"1991 0\n",
"1992 0\n",
"1993 0\n",
"1994 0\n",
"1995 0\n",
"1996 0\n",
"1997 0\n",
"1998 0\n",
"1999 0\n",
"2000 0\n",
"2001 0\n",
"2002 0\n",
"2003 0\n",
"2004 0\n",
"2005 0\n",
"2006 0\n",
"2007 0\n",
"2008 0\n",
"2009 0\n",
"2010 0\n",
"2011 0\n",
"2012 0\n",
"2013 0\n",
"2014 0\n"
]
}
],
"source": [
"dk=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n",
"c=0\n",
"for year in data:\n",
" for target in data[year]:\n",
" for source in data[year][target]:\n",
" dk.loc[c]=[target,source,year,data[year][target][source][0],data[year][target][source][1]]\n",
" c+=1\n",
" print year,'0'"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"dk=dk.set_index(['year','importer1','importer2'])\n",
"dk.to_csv('datab.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"other direction flow"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data={}\n",
"countries=set()\n",
"for i in df.T.iteritems():\n",
" year=int(i[1][2])\n",
" target=cc(i[1][0])\n",
" source=cc(i[1][1])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" value=i[1][3]\n",
" countries.add(target)\n",
" countries.add(source)\n",
" if year not in data:data[year]={}\n",
" if ((source in data[year]) and (target in data[year][source])):\n",
" data[year][source][target][1]=value\n",
" else:\n",
" if target not in data[year]:data[year][target]={}\n",
" if source not in data[year][target]:data[year][target][source]=[0,0]\n",
" data[year][target][source][0]=value \n",
"dk=pd.DataFrame(columns=['importer2','importer1','year','flow1','flow2'])\n",
"c=0\n",
"for year in data:\n",
" for target in data[year]:\n",
" for source in data[year][target]:\n",
" dk.loc[c]=[target,source,year,data[year][target][source][0],data[year][target][source][1]]\n",
" c+=1\n",
" print year,'0'\n",
"dk=dk.set_index(['year','importer2','importer1'])\n",
"dk.to_csv('data.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"extra experimentations"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {},
"outputs": [],
"source": [
"data2={}\n",
"countries2=set()\n",
"for i in df.T.iteritems():\n",
" year=int(i[1][2])\n",
" target=cc(i[1][0])\n",
" source=cc(i[1][1])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" value=i[1][3]\n",
" countries2.add(target)\n",
" countries2.add(source)\n",
" if value<10000:\n",
" target=\"Other\"\n",
" if year not in data2:data2[year]={}\n",
" if ((source in data2[year]) and (target in data2[year][source])):\n",
" data2[year][source][target][1]+=value\n",
" else:\n",
" target=cc(i[1][0])\n",
" source=cc(i[1][1])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" if value<10000:\n",
" source=\"Other\"\n",
" if target not in data2[year]:data2[year][target]={}\n",
" if source not in data2[year][target]:data2[year][target][source]=[0,0]\n",
" \n",
" data2[year][target][source][0]+=value "
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1975 1\n",
"1976 1\n",
"1977 1\n",
"1978 1\n",
"1979 1\n",
"1980 1\n",
"1981 1\n",
"1982 1\n",
"1983 1\n",
"1984 1\n",
"1985 1\n",
"1986 1\n",
"1987 1\n",
"1988 1\n",
"1989 1\n",
"1990 1\n",
"1991 1\n",
"1992 1\n",
"1993 1\n",
"1994 1\n",
"1995 1\n",
"1996 1\n",
"1997 1\n",
"1998 1\n",
"1999 1\n",
"2000 1\n",
"2001 1\n",
"2002 1\n",
"2003 1\n",
"2004 1\n",
"2005 1\n",
"2006 1\n",
"2007 1\n",
"2008 1\n",
"2009 1\n",
"2010 1\n",
"2011 1\n",
"2012 1\n",
"2013 1\n"
]
}
],
"source": [
"dk2=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n",
"c=0\n",
"for year in data2:\n",
" for target in data2[year]:\n",
" for source in data2[year][target]:\n",
" dk2.loc[c]=[target,source,year,data2[year][target][source][0],data2[year][target][source][1]]\n",
" c+=1\n",
" print year,'1'"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [],
"source": [
"dk2=dk2.set_index(['year','importer1','importer2'])\n",
"dk2.to_csv('data2.csv')"
]
},
{
"cell_type": "code",
"execution_count": 262,
"metadata": {},
"outputs": [],
"source": [
"data3={}\n",
"for i in df.T.iteritems():\n",
" year=int(i[1][2])\n",
" target=cc(i[1][0])\n",
" source=cc(i[1][1])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" value=i[1][3]\n",
" if value<0:\n",
" target=\"Other\"\n",
" if year not in data3:data3[year]={}\n",
" if ((source in data3[year]) and (target in data3[year][source])):\n",
" data3[year][source][target][1]+=value\n",
" else:\n",
" target=cc(i[1][0])\n",
" source=cc(i[1][1])\n",
" if target==\"Various\":\n",
" target=\"Other\"\n",
" if source==\"Various\":\n",
" source=\"Other\"\n",
" if value<0:\n",
" source=\"Other\"\n",
" if target not in data3[year]:data3[year][target]={}\n",
" if source not in data3[year][target]:data3[year][target][source]=[0,0]\n",
" \n",
" data3[year][target][source][0]+=value "
]
},
{
"cell_type": "code",
"execution_count": 327,
"metadata": {},
"outputs": [],
"source": [
"#for th in {1000,5000,10000,50000,100000}:\n",
"for th in {10000,100000}: \n",
" dk3=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n",
" c=0 \n",
" for year in data3:\n",
" p=0\n",
" dk3.loc[c]=['Other','Other',year,0,0]\n",
" r=copy.deepcopy(c)\n",
" c+=1\n",
" for target in data3[year]:\n",
" for source in data3[year][target]:\n",
" if ((source!='Other') and (target!='Other')):\n",
" if ((data3[year][target][source][0]>th)or(data3[year][target][source][1]>th)):\n",
" dk3.loc[c]=[target,source,year,data3[year][target][source][0],data3[year][target][source][1]]\n",
" else: p+=data3[year][target][source][0]+data3[year][target][source][1]\n",
" else: dk3.loc[r]['flow1']+=data3[year][target][source][0]+data3[year][target][source][1]\n",
" c+=1\n",
" dk3[(dk3['year']==year)&(dk3['importer2']=='Other')&(dk3['importer1']=='Other')]['flow1']+=p\n",
" dk3=dk3.set_index(['year','importer1','importer2'])\n",
" dk3.to_csv(repr(th)+'data3.csv')\n",
" dk3b=dk3.copy()\n",
" dk3b.index.names=[u'year', u'importer2', u'importer1']\n",
" dk3b.to_csv(repr(th)+'data3b.csv')"
]
},
{
"cell_type": "code",
"execution_count": 326,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" flow1 | \n",
" flow2 | \n",
"
\n",
" \n",
" importer1 | \n",
" importer2 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Other | \n",
" Other | \n",
" 2472611 | \n",
" 0 | \n",
"
\n",
" \n",
" Ethiopia | \n",
" Sudan | \n",
" 10600 | \n",
" 105000 | \n",
"
\n",
" \n",
" Tanzania | \n",
" Burundi | \n",
" 126600 | \n",
" 0 | \n",
"
\n",
" \n",
" Rwanda | \n",
" 23600 | \n",
" 0 | \n",
"
\n",
" \n",
" DRC | \n",
" Burundi | \n",
" 18220 | \n",
" 0 | \n",
"
\n",
" \n",
" Rwanda | \n",
" 24500 | \n",
" 0 | \n",
"
\n",
" \n",
" Cameroon | \n",
" Equatorial Guinea | \n",
" 30000 | \n",
" 0 | \n",
"
\n",
" \n",
" Algeria | \n",
" Western Sahara | \n",
" 35000 | \n",
" 0 | \n",
"
\n",
" \n",
" Zambia | \n",
" Angola | \n",
" 27730 | \n",
" 0 | \n",
"
\n",
" \n",
" Senegal | \n",
" Guinea-Bissau | \n",
" 37000 | \n",
" 0 | \n",
"
\n",
" \n",
" Thailand | \n",
" Lao PDR | \n",
" 62720 | \n",
" 0 | \n",
"
\n",
" \n",
" Cambodia | \n",
" 17090 | \n",
" 0 | \n",
"
\n",
" \n",
" Gabon | \n",
" Equatorial Guinea | \n",
" 60000 | \n",
" 0 | \n",
"
\n",
" \n",
" Angola | \n",
" DRC | \n",
" 5000 | \n",
" 471340 | \n",
"
\n",
" \n",
" Côte d'Ivoire | \n",
" Guinea | \n",
" 500000 | \n",
" 0 | \n",
"
\n",
" \n",
" Mozambique | \n",
" Zimbabwe | \n",
" 30000 | \n",
" 0 | \n",
"
\n",
" \n",
" Uganda | \n",
" DRC | \n",
" 34230 | \n",
" 0 | \n",
"
\n",
" \n",
" Rwanda | \n",
" 78480 | \n",
" 0 | \n",
"
\n",
" \n",
" Burundi | \n",
" Rwanda | \n",
" 49500 | \n",
" 7500 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" flow1 flow2\n",
"importer1 importer2 \n",
"Other Other 2472611 0\n",
"Ethiopia Sudan 10600 105000\n",
"Tanzania Burundi 126600 0\n",
" Rwanda 23600 0\n",
"DRC Burundi 18220 0\n",
" Rwanda 24500 0\n",
"Cameroon Equatorial Guinea 30000 0\n",
"Algeria Western Sahara 35000 0\n",
"Zambia Angola 27730 0\n",
"Senegal Guinea-Bissau 37000 0\n",
"Thailand Lao PDR 62720 0\n",
" Cambodia 17090 0\n",
"Gabon Equatorial Guinea 60000 0\n",
"Angola DRC 5000 471340\n",
"Côte d'Ivoire Guinea 500000 0\n",
"Mozambique Zimbabwe 30000 0\n",
"Uganda DRC 34230 0\n",
" Rwanda 78480 0\n",
"Burundi Rwanda 49500 7500"
]
},
"execution_count": 326,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dk3.loc[1976]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}