{ "metadata": { "name": "", "signature": "sha256:2562bb6a1b236e1119dc44a04cd870f39d8d6c63b5365119d464e867af5ee9f1" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd, copy" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 77 }, { "cell_type": "code", "collapsed": false, "input": [ "#UNDATA dbd 1975-2013\n", "\n", "#http://data.un.org/Data.aspx?d=UNHCR&f=indID%3AType-Ref\n", "#doanload data in batches, query limited to 50.000\n", "df1=pd.read_csv('1985.csv') #years 1975-1984\n", "df2=pd.read_csv('1995.csv') #years 1985-1994\n", "df3=pd.read_csv('2005.csv') #years 1995-2004\n", "df4=pd.read_csv('2015.csv') #years 2005-2013" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "#UNHCR db 2001-2014\n", "\n", "#http://popstats.unhcr.org/en/overview#_ga=1.40654370.1278371767.1434418671 - select time series\n", "df=pd.read_csv('all_data.csv',skiprows=3)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "#UNHCR db 1951-2014\n", "\n", "#http://popstats.unhcr.org/en/overview#_ga=1.40654370.1278371767.1434418671 - person of concern\n", "df=pd.read_csv('all_data2.csv',skiprows=3)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 78 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearCountry / territory of asylum/residenceOrigin / Returned fromRefugeesAsylum-seekers (pending cases)Returned refugeesInternally displaced persons (IDPs)Returned IDPsStateless personsOthers of concernTotal Population
0 1951 NaN Various 8800NaNNaNNaNNaNNaNNaN 8800
1 1951 NaN Various 120000NaNNaNNaNNaNNaNNaN 120000
2 1951 Australia Various 180000NaNNaNNaNNaNNaNNaN 180000
3 1951 Austria Various 282000NaNNaNNaNNaNNaNNaN 282000
4 1951 Belgium Various 55000NaNNaNNaNNaNNaNNaN 55000
5 1951 Canada Various 168511NaNNaNNaNNaNNaNNaN 168511
6 1951 Denmark Various 2000NaNNaNNaNNaNNaNNaN 2000
7 1951 France Various 290000NaNNaNNaNNaNNaNNaN 290000
8 1951 Germany Various 265000NaNNaNNaNNaNNaNNaN 265000
9 1951 Greece Various 18000NaNNaNNaNNaNNaNNaN 18000
10 1951 Hong Kong SAR, China Various 30000NaNNaNNaNNaNNaNNaN 30000
11 1951 Italy Various 26500NaNNaNNaNNaNNaNNaN 26500
12 1951 Luxembourg Various 1800NaNNaNNaNNaNNaNNaN 1800
13 1951 Morocco Various 3000NaNNaNNaNNaNNaNNaN 3000
14 1951 Netherlands Various 14200NaNNaNNaNNaNNaNNaN 14200
15 1951 Norway Various 2500NaNNaNNaNNaNNaNNaN 2500
16 1951 Spain Various 2000NaNNaNNaNNaNNaNNaN 2000
17 1951 Sweden Various 44000NaNNaNNaNNaNNaNNaN 44000
18 1951 Switzerland Various 10000NaNNaNNaNNaNNaNNaN 10000
19 1951 Tunisia Various 2000NaNNaNNaNNaNNaNNaN 2000
20 1951 Turkey Various 2700NaNNaNNaNNaNNaNNaN 2700
21 1951 United Kingdom Various 208000NaNNaNNaNNaNNaNNaN 208000
22 1951 United States Various 350000NaNNaNNaNNaNNaNNaN 350000
23 1951 Various Various 30000NaNNaNNaNNaNNaNNaN 30000
24 1952 NaN Various 14000NaNNaNNaNNaNNaNNaN 14000
25 1952 NaN Various 120000NaNNaNNaNNaNNaNNaN 120000
26 1952 Austria Various 238200NaNNaNNaNNaNNaNNaN 238200
27 1952 Belgium Various 53500NaNNaNNaNNaNNaNNaN 53500
28 1952 Canada Various 154828NaNNaNNaNNaNNaNNaN 154828
29 1952 Denmark Various 1800NaNNaNNaNNaNNaNNaN 1800
....................................
88272 2014 Hungary Zimbabwe 4NaNNaNNaNNaNNaNNaN 4
88273 2014 Ireland Zimbabwe 157NaNNaNNaNNaNNaNNaN 157
88274 2014 Israel Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88275 2014 Italy Zimbabwe 27NaNNaNNaNNaNNaNNaN 27
88276 2014 Kenya Zimbabwe 4NaNNaNNaNNaNNaNNaN 4
88277 2014 Lesotho Zimbabwe 7NaNNaNNaNNaNNaNNaN 7
88278 2014 Lithuania Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88279 2014 Malaysia Zimbabwe 2NaNNaNNaNNaNNaNNaN 2
88280 2014 Namibia Zimbabwe 32NaNNaNNaNNaNNaNNaN 32
88281 2014 Netherlands Zimbabwe 45NaNNaNNaNNaNNaNNaN 45
88282 2014 New Zealand Zimbabwe 47NaNNaNNaNNaNNaNNaN 47
88283 2014 Norway Zimbabwe 23NaNNaNNaNNaNNaNNaN 23
88284 2014 Panama Zimbabwe 2NaNNaNNaNNaNNaNNaN 2
88285 2014 Poland Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88286 2014 Portugal Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88287 2014 Rep. of Moldova Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88288 2014 Romania Zimbabwe 5NaNNaNNaNNaNNaNNaN 5
88289 2014 Slovenia Zimbabwe 2NaNNaNNaNNaNNaNNaN 2
88290 2014 South Africa Zimbabwe 6217NaNNaNNaNNaNNaNNaN 6217
88291 2014 Spain Zimbabwe 2NaNNaNNaNNaNNaNNaN 2
88292 2014 Sudan Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88293 2014 Swaziland Zimbabwe 7NaNNaNNaNNaNNaNNaN 7
88294 2014 Sweden Zimbabwe 41NaNNaNNaNNaNNaNNaN 41
88295 2014 Switzerland Zimbabwe 14NaNNaNNaNNaNNaNNaN 14
88296 2014 Thailand Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88297 2014 Ukraine Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88298 2014 United Kingdom Zimbabwe 9467NaNNaNNaNNaNNaNNaN 9467
88299 2014 United States Zimbabwe 1492NaNNaNNaNNaNNaNNaN 1492
88300 2014 Uruguay Zimbabwe 1NaNNaNNaNNaNNaNNaN 1
88301 2014 Zambia Zimbabwe 6NaNNaNNaNNaNNaNNaN 6
\n", "

88302 rows \u00d7 11 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 79, "text": [ " Year Country / territory of asylum/residence Origin / Returned from \\\n", "0 1951 NaN Various \n", "1 1951 NaN Various \n", "2 1951 Australia Various \n", "3 1951 Austria Various \n", "4 1951 Belgium Various \n", "5 1951 Canada Various \n", "6 1951 Denmark Various \n", "7 1951 France Various \n", "8 1951 Germany Various \n", "9 1951 Greece Various \n", "10 1951 Hong Kong SAR, China Various \n", "11 1951 Italy Various \n", "12 1951 Luxembourg Various \n", "13 1951 Morocco Various \n", "14 1951 Netherlands Various \n", "15 1951 Norway Various \n", "16 1951 Spain Various \n", "17 1951 Sweden Various \n", "18 1951 Switzerland Various \n", "19 1951 Tunisia Various \n", "20 1951 Turkey Various \n", "21 1951 United Kingdom Various \n", "22 1951 United States Various \n", "23 1951 Various Various \n", "24 1952 NaN Various \n", "25 1952 NaN Various \n", "26 1952 Austria Various \n", "27 1952 Belgium Various \n", "28 1952 Canada Various \n", "29 1952 Denmark Various \n", "... ... ... ... \n", "88272 2014 Hungary Zimbabwe \n", "88273 2014 Ireland Zimbabwe \n", "88274 2014 Israel Zimbabwe \n", "88275 2014 Italy Zimbabwe \n", "88276 2014 Kenya Zimbabwe \n", "88277 2014 Lesotho Zimbabwe \n", "88278 2014 Lithuania Zimbabwe \n", "88279 2014 Malaysia Zimbabwe \n", "88280 2014 Namibia Zimbabwe \n", "88281 2014 Netherlands Zimbabwe \n", "88282 2014 New Zealand Zimbabwe \n", "88283 2014 Norway Zimbabwe \n", "88284 2014 Panama Zimbabwe \n", "88285 2014 Poland Zimbabwe \n", "88286 2014 Portugal Zimbabwe \n", "88287 2014 Rep. of Moldova Zimbabwe \n", "88288 2014 Romania Zimbabwe \n", "88289 2014 Slovenia Zimbabwe \n", "88290 2014 South Africa Zimbabwe \n", "88291 2014 Spain Zimbabwe \n", "88292 2014 Sudan Zimbabwe \n", "88293 2014 Swaziland Zimbabwe \n", "88294 2014 Sweden Zimbabwe \n", "88295 2014 Switzerland Zimbabwe \n", "88296 2014 Thailand Zimbabwe \n", "88297 2014 Ukraine Zimbabwe \n", "88298 2014 United Kingdom Zimbabwe \n", "88299 2014 United States Zimbabwe \n", "88300 2014 Uruguay Zimbabwe \n", "88301 2014 Zambia Zimbabwe \n", "\n", " Refugees Asylum-seekers (pending cases) Returned refugees \\\n", "0 8800 NaN NaN \n", "1 120000 NaN NaN \n", "2 180000 NaN NaN \n", "3 282000 NaN NaN \n", "4 55000 NaN NaN \n", "5 168511 NaN NaN \n", "6 2000 NaN NaN \n", "7 290000 NaN NaN \n", "8 265000 NaN NaN \n", "9 18000 NaN NaN \n", "10 30000 NaN NaN \n", "11 26500 NaN NaN \n", "12 1800 NaN NaN \n", "13 3000 NaN NaN \n", "14 14200 NaN NaN \n", "15 2500 NaN NaN \n", "16 2000 NaN NaN \n", "17 44000 NaN NaN \n", "18 10000 NaN NaN \n", "19 2000 NaN NaN \n", "20 2700 NaN NaN \n", "21 208000 NaN NaN \n", "22 350000 NaN NaN \n", "23 30000 NaN NaN \n", "24 14000 NaN NaN \n", "25 120000 NaN NaN \n", "26 238200 NaN NaN \n", "27 53500 NaN NaN \n", "28 154828 NaN NaN \n", "29 1800 NaN NaN \n", "... ... ... ... \n", "88272 4 NaN NaN \n", "88273 157 NaN NaN \n", "88274 1 NaN NaN \n", "88275 27 NaN NaN \n", "88276 4 NaN NaN \n", "88277 7 NaN NaN \n", "88278 1 NaN NaN \n", "88279 2 NaN NaN \n", "88280 32 NaN NaN \n", "88281 45 NaN NaN \n", "88282 47 NaN NaN \n", "88283 23 NaN NaN \n", "88284 2 NaN NaN \n", "88285 1 NaN NaN \n", "88286 1 NaN NaN \n", "88287 1 NaN NaN \n", "88288 5 NaN NaN \n", "88289 2 NaN NaN \n", "88290 6217 NaN NaN \n", "88291 2 NaN NaN \n", "88292 1 NaN NaN \n", "88293 7 NaN NaN \n", "88294 41 NaN NaN \n", "88295 14 NaN NaN \n", "88296 1 NaN NaN \n", "88297 1 NaN NaN \n", "88298 9467 NaN NaN \n", "88299 1492 NaN NaN \n", "88300 1 NaN NaN \n", "88301 6 NaN NaN \n", "\n", " Internally displaced persons (IDPs) Returned IDPs Stateless persons \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "5 NaN NaN NaN \n", "6 NaN NaN NaN \n", "7 NaN NaN NaN \n", "8 NaN NaN NaN \n", "9 NaN NaN NaN \n", "10 NaN NaN NaN \n", "11 NaN NaN NaN \n", "12 NaN NaN NaN \n", "13 NaN NaN NaN \n", "14 NaN NaN NaN \n", "15 NaN NaN NaN \n", "16 NaN NaN NaN \n", "17 NaN NaN NaN \n", "18 NaN NaN NaN \n", "19 NaN NaN NaN \n", "20 NaN NaN NaN \n", "21 NaN NaN NaN \n", "22 NaN NaN NaN \n", "23 NaN NaN NaN \n", "24 NaN NaN NaN \n", "25 NaN NaN NaN \n", "26 NaN NaN NaN \n", "27 NaN NaN NaN \n", "28 NaN NaN NaN \n", "29 NaN NaN NaN \n", "... ... ... ... \n", "88272 NaN NaN NaN \n", "88273 NaN NaN NaN \n", "88274 NaN NaN NaN \n", "88275 NaN NaN NaN \n", "88276 NaN NaN NaN \n", "88277 NaN NaN NaN \n", "88278 NaN NaN NaN \n", "88279 NaN NaN NaN \n", "88280 NaN NaN NaN \n", "88281 NaN NaN NaN \n", "88282 NaN NaN NaN \n", "88283 NaN NaN NaN \n", "88284 NaN NaN NaN \n", "88285 NaN NaN NaN \n", "88286 NaN NaN NaN \n", "88287 NaN NaN NaN \n", "88288 NaN NaN NaN \n", "88289 NaN NaN NaN \n", "88290 NaN NaN NaN \n", "88291 NaN NaN NaN \n", "88292 NaN NaN NaN \n", "88293 NaN NaN NaN \n", "88294 NaN NaN NaN \n", "88295 NaN NaN NaN \n", "88296 NaN NaN NaN \n", "88297 NaN NaN NaN \n", "88298 NaN NaN NaN \n", "88299 NaN NaN NaN \n", "88300 NaN NaN NaN \n", "88301 NaN NaN NaN \n", "\n", " Others of concern Total Population \n", "0 NaN 8800 \n", "1 NaN 120000 \n", "2 NaN 180000 \n", "3 NaN 282000 \n", "4 NaN 55000 \n", "5 NaN 168511 \n", "6 NaN 2000 \n", "7 NaN 290000 \n", "8 NaN 265000 \n", "9 NaN 18000 \n", "10 NaN 30000 \n", "11 NaN 26500 \n", "12 NaN 1800 \n", "13 NaN 3000 \n", "14 NaN 14200 \n", "15 NaN 2500 \n", "16 NaN 2000 \n", "17 NaN 44000 \n", "18 NaN 10000 \n", "19 NaN 2000 \n", "20 NaN 2700 \n", "21 NaN 208000 \n", "22 NaN 350000 \n", "23 NaN 30000 \n", "24 NaN 14000 \n", "25 NaN 120000 \n", "26 NaN 238200 \n", "27 NaN 53500 \n", "28 NaN 154828 \n", "29 NaN 1800 \n", "... ... ... \n", "88272 NaN 4 \n", "88273 NaN 157 \n", "88274 NaN 1 \n", "88275 NaN 27 \n", "88276 NaN 4 \n", "88277 NaN 7 \n", "88278 NaN 1 \n", "88279 NaN 2 \n", "88280 NaN 32 \n", "88281 NaN 45 \n", "88282 NaN 47 \n", "88283 NaN 23 \n", "88284 NaN 2 \n", "88285 NaN 1 \n", "88286 NaN 1 \n", "88287 NaN 1 \n", "88288 NaN 5 \n", "88289 NaN 2 \n", "88290 NaN 6217 \n", "88291 NaN 2 \n", "88292 NaN 1 \n", "88293 NaN 7 \n", "88294 NaN 41 \n", "88295 NaN 14 \n", "88296 NaN 1 \n", "88297 NaN 1 \n", "88298 NaN 9467 \n", "88299 NaN 1492 \n", "88300 NaN 1 \n", "88301 NaN 6 \n", "\n", "[88302 rows x 11 columns]" ] } ], "prompt_number": 79 }, { "cell_type": "code", "collapsed": false, "input": [ "df.columns=['year','target','source','a','b','c','d','e','f','g','value']\n", "df=df.drop(['a','b','c','d','e','f','g',],axis=1).dropna()#.set_index(['year','target','source'])[:80440]\n", "df.head(50)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yeartargetsourcevalue
2 1951 Australia Various 180000
3 1951 Austria Various 282000
4 1951 Belgium Various 55000
5 1951 Canada Various 168511
6 1951 Denmark Various 2000
7 1951 France Various 290000
8 1951 Germany Various 265000
9 1951 Greece Various 18000
10 1951 Hong Kong SAR, China Various 30000
11 1951 Italy Various 26500
12 1951 Luxembourg Various 1800
13 1951 Morocco Various 3000
14 1951 Netherlands Various 14200
15 1951 Norway Various 2500
16 1951 Spain Various 2000
17 1951 Sweden Various 44000
18 1951 Switzerland Various 10000
19 1951 Tunisia Various 2000
20 1951 Turkey Various 2700
21 1951 United Kingdom Various 208000
22 1951 United States Various 350000
23 1951 Various Various 30000
26 1952 Austria Various 238200
27 1952 Belgium Various 53500
28 1952 Canada Various 154828
29 1952 Denmark Various 1800
30 1952 France Various 280000
31 1952 Germany Various 240000
32 1952 Greece Various 19000
33 1952 Italy Various 25500
34 1952 Luxembourg Various 1800
35 1952 Morocco Various 2900
36 1952 Netherlands Various 14100
37 1952 Norway Various 2500
38 1952 Sweden Various 42000
39 1952 Switzerland Various 9800
40 1952 Turkey Various 2000
41 1952 United Kingdom Various 200000
42 1952 United States Various 500000
43 1952 Various Various 31000
46 1953 Austria Various 215200
47 1953 Belgium Various 53000
48 1953 Canada Various 107004
49 1953 Denmark Various 1600
50 1953 France Various 270000
51 1953 Germany Various 228000
52 1953 Greece Various 18500
53 1953 Italy Various 24500
54 1953 Luxembourg Various 1800
55 1953 Morocco Various 2800
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 80, "text": [ " year target source value\n", "2 1951 Australia Various 180000\n", "3 1951 Austria Various 282000\n", "4 1951 Belgium Various 55000\n", "5 1951 Canada Various 168511\n", "6 1951 Denmark Various 2000\n", "7 1951 France Various 290000\n", "8 1951 Germany Various 265000\n", "9 1951 Greece Various 18000\n", "10 1951 Hong Kong SAR, China Various 30000\n", "11 1951 Italy Various 26500\n", "12 1951 Luxembourg Various 1800\n", "13 1951 Morocco Various 3000\n", "14 1951 Netherlands Various 14200\n", "15 1951 Norway Various 2500\n", "16 1951 Spain Various 2000\n", "17 1951 Sweden Various 44000\n", "18 1951 Switzerland Various 10000\n", "19 1951 Tunisia Various 2000\n", "20 1951 Turkey Various 2700\n", "21 1951 United Kingdom Various 208000\n", "22 1951 United States Various 350000\n", "23 1951 Various Various 30000\n", "26 1952 Austria Various 238200\n", "27 1952 Belgium Various 53500\n", "28 1952 Canada Various 154828\n", "29 1952 Denmark Various 1800\n", "30 1952 France Various 280000\n", "31 1952 Germany Various 240000\n", "32 1952 Greece Various 19000\n", "33 1952 Italy Various 25500\n", "34 1952 Luxembourg Various 1800\n", "35 1952 Morocco Various 2900\n", "36 1952 Netherlands Various 14100\n", "37 1952 Norway Various 2500\n", "38 1952 Sweden Various 42000\n", "39 1952 Switzerland Various 9800\n", "40 1952 Turkey Various 2000\n", "41 1952 United Kingdom Various 200000\n", "42 1952 United States Various 500000\n", "43 1952 Various Various 31000\n", "46 1953 Austria Various 215200\n", "47 1953 Belgium Various 53000\n", "48 1953 Canada Various 107004\n", "49 1953 Denmark Various 1600\n", "50 1953 France Various 270000\n", "51 1953 Germany Various 228000\n", "52 1953 Greece Various 18500\n", "53 1953 Italy Various 24500\n", "54 1953 Luxembourg Various 1800\n", "55 1953 Morocco Various 2800" ] } ], "prompt_number": 80 }, { "cell_type": "code", "collapsed": false, "input": [ "#optional, hungarian country names, experimental, continue onto next cell\n", "import json, numpy as np\n", "with open('hun.json') as data_file: \n", " hun = json.load(data_file)\n", "for i in df.T.iteritems():\n", " if i[1][1] not in hun:\n", " print i[1][1]\n", " if i[1][2] not in hun:\n", " print i[1][2]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 81 }, { "cell_type": "code", "collapsed": false, "input": [ "df.set_index(['year','target','source']).loc[2014].loc['Pakistan']" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
value
source
Afghanistan 1517903
Algeria 17
Ethiopia 4
Ghana 1
Iraq 43
Islamic Rep. of Iran 56
Myanmar 23
Pakistan 1451729
Russian Federation 7
Rwanda 1
Somalia 412
State of Palestine 16
Sudan 2
Syrian Arab Rep. 16
Turkey 7
Uganda 1
Uzbekistan 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 82, "text": [ " value\n", "source \n", "Afghanistan 1517903\n", "Algeria 17\n", "Ethiopia 4\n", "Ghana 1\n", "Iraq 43\n", "Islamic Rep. of Iran 56\n", "Myanmar 23\n", "Pakistan 1451729\n", "Russian Federation 7\n", "Rwanda 1\n", "Somalia 412\n", "State of Palestine 16\n", "Sudan 2\n", "Syrian Arab Rep. 16\n", "Turkey 7\n", "Uganda 1\n", "Uzbekistan 7" ] } ], "prompt_number": 82 }, { "cell_type": "markdown", "metadata": {}, "source": [ "palestinian data, from world bank" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "def interpolate(d,years,gfit=2,depth=1,polyorder=1,override=True):\n", " #depth * length of interpolation substrings will be taken to the left and right\n", " #for example for {1971:5,1972:6,1973:7,1974:5} interpolating it over 1969-1990\n", " #for the section 1960-1970 (2 elements) the values from 1972,1973,1974 (3 elements) will be taken with depth 1.5\n", " #for the section 1974-1990 (15 elements) all values (4 elements) will be taken to extrapolate\n", " if (gfit>2): \n", " print 'interpolate takes only 1 (polynomial) or 2 (exponential) as 3rd argument [default=2]'\n", " return\n", " mydict={}\n", " missing_points=[[]]\n", " for year in years:\n", " if year not in d.keys():\n", " missing_points[-1].append(year)\n", " else:\n", " missing_points.append([])\n", " for m in missing_points:\n", " if m:\n", " fit=gfit\n", " if ((m[-1]np.sort(d.keys())[-1])): #check if it is ends of the interval, then extrapolate mean only\n", " if not override: fit=0\n", " \n", " if fit==0: #take average\n", " y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(3)),min(max(years),max(m)+int(3))+1))}\n", " for i in range(len(m)):\n", " mydict[m[i]]=np.mean(y.values())\n", " elif fit==1:\n", " #intersector\n", " y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}\n", " #print y\n", " w = np.polyfit(y.keys(),y.values(),polyorder) # obtaining regression parameters\n", " if (polyorder==1):\n", " intersector=w[0]*np.array(m)+w[1]\n", " else:\n", " intersector=w[0]*np.array(m)*np.array(m)+w[1]*np.array(m)+w[2]\n", " for i in range(len(m)):\n", " mydict[m[i]]=max(0,intersector[i])\n", " else:\n", " #intersector\n", " y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}\n", " #print y\n", " w = np.polyfit(y.keys(),np.log(y.values()),1) # obtaining log regression parameters (exp fitting)\n", " intersector=np.exp(w[1])*np.exp(w[0]*np.array(m))\n", " for i in range(len(m)):\n", " mydict[m[i]]=max(0,intersector[i])\n", " \n", " #return interpolated points\n", " return mydict" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 83 }, { "cell_type": "code", "collapsed": false, "input": [ "#http://data.worldbank.org/indicator/SM.POP.REFG/countries?display=default\n", "dz=pd.read_csv('pal.csv',skiprows=2)\n", "dz.columns=['country','cc','a','b']+range(1960,2016)\n", "dz=dz.set_index('country').drop(['cc','a','b']+range(1960,1975)+range(2014,2016),axis=1)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 84 }, { "cell_type": "code", "collapsed": false, "input": [ "ccc={'Syrian Arab Republic':'Syria','West Bank and Gaza':'Palestine','Jordan':'Jordan','Lebanon':'Lebanon'}\n", "ccr={'Syrian Arab Republic':'Syrian Arab Rep.','West Bank and Gaza':'State of Palestine','Jordan':'Jordan','Lebanon':'Lebanon'}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 86 }, { "cell_type": "code", "collapsed": false, "input": [ "dx=pd.concat([pd.DataFrame(dz.loc['Jordan']).T,pd.DataFrame(dz.loc['Lebanon']).T\\\n", " ,pd.DataFrame(dz.loc['Syrian Arab Republic']).T,pd.DataFrame(dz.loc['West Bank and Gaza']).T])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 87 }, { "cell_type": "code", "collapsed": false, "input": [ "#reset df, without setting index to run this parts\n", "palref=['Jordan','Lebanon','Syrian Arab Republic','West Bank and Gaza']\n", "dc=pd.DataFrame(columns=['year','target','source','value'])\n", " \n", "for k in range(4):\n", " hp={}\n", " for i in dx.loc[palref[k]].iteritems():\n", " if ~np.isnan(i[1]):\n", " hp[i[0]]=i[1]\n", " hp.update(interpolate(hp,range(1951,2015)))\n", " for y in hp:\n", " val=hp[y]-df[((df['target']==ccr[palref[k]])&(df['year']==y))].sum()[3]\n", " dc.loc[y+k*1000]=[y,ccc[palref[k]],'Palestine',val]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n", " warnings.warn(msg, RankWarning)\n", "C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n", " warnings.warn(msg, RankWarning)\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n", " warnings.warn(msg, RankWarning)\n", "C:\\Anaconda\\lib\\site-packages\\numpy\\lib\\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned\n", " warnings.warn(msg, RankWarning)\n" ] } ], "prompt_number": 88 }, { "cell_type": "markdown", "metadata": {}, "source": [ "append plaestine data to main dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df=pd.concat([df,dc])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 90 }, { "cell_type": "markdown", "metadata": {}, "source": [ "save data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# old, english\n", "def cc(country):\n", " if country in cc2:\n", " return cc2[country]\n", " else: return country\n", "cc2={\n", "'Bolivia (Plurinational State of)':'Bolivia',\n", "'Micronesia (Federated States of)':'Micronesia',\n", "'Serbia (and Kosovo: S/RES/1244 (1999))':'Serbia & Kosovo',\n", "'The former Yugoslav Rep. of Macedonia':'FYROM',\n", "'Venezuela (Bolivarian Republic of)':'Venezuela',\n", "'Dem. Rep. of the Congo':'DRC',\n", "'Central African Rep.':'CAR',\n", "\"Dem. People's Rep. of Korea\":\"North Korea\",\n", "'Islamic Rep. of Iran':'Iran',\n", "\"Lao People's Dem. Rep.\":'Lao PDR',\n", "'Papua New Guinea':'PNG',\n", "'Syrian Arab Rep.':'Syria',\n", "'United Rep. of Tanzania':'Tanzania',\n", "'United Arab Emirates':'UAE',\n", "'Antigua and Barbuda':'Antig. & Barb',\n", "'Bosnia and Herzegovina':'Bosnia & Herz.',\n", "'British Virgin Islands':'UK Virgin',\n", "'Brunei Darussalam':'Brunei',\n", "'Hong Kong SAR, China':'Hong Kong',\n", "'Macao SAR, China':'Macao',\n", "'Russian Federation':'Russia',\n", "'Saint Kitts and Nevis':'St. Kitts & Nev.',\n", "'Saint Vincent and the Grenadines':'St. Vinc. & Gren.',\n", "'Sint Maarten (Dutch part)':'St. Maarten',\n", "'State of Palestine':'Palestine',\n", "'Trinidad and Tobago':'Trinid. & Tob.',\n", "'Turks and Caicos Islands':'Turks & Caicos'\n", "}\n", "\n", "#new, hungarian\n", "def cc(country):\n", " if country in hun:\n", " return repr(hun[country][0])\n", " else: return repr(country)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 109 }, { "cell_type": "code", "collapsed": false, "input": [ "data={}\n", "countries=set()\n", "for i in df.T.iteritems():\n", " year=int(i[1][0])\n", " target=cc(i[1][1])\n", " source=cc(i[1][2])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " value=i[1][3]\n", " countries.add(target)\n", " countries.add(source)\n", " if year not in data:data[year]={}\n", " if ((source in data[year]) and (target in data[year][source])):\n", " data[year][source][target][1]=value\n", " else:\n", " if target not in data[year]:data[year][target]={}\n", " if source not in data[year][target]:data[year][target][source]=[0,0]\n", " data[year][target][source][0]=value " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 110 }, { "cell_type": "code", "collapsed": false, "input": [ "dk=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n", "c=0\n", "for year in data:\n", " for target in data[year]:\n", " for source in data[year][target]:\n", " dk.loc[c]=[target,source,year,data[year][target][source][0],data[year][target][source][1]]\n", " c+=1\n", " print year,'0'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1951 0\n", "1952 0\n", "1953 0\n", "1954 0\n", "1955" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1956 0\n", "1957 0\n", "1958 0\n", "1959" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1960 0\n", "1961 0\n", "1962" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1963 0\n", "1964" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1965 0\n", "1966" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1967 0\n", "1968" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1969" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1970" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1971" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1972" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1973" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1974" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1975" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1976" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1977" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1978" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1979" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1980" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1981" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1982" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1983" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1984" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1985" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1986" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1987" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1988" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1989" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1990" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1991" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1992" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1993" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1994" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1995" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1996" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1997" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1998" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "1999" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2000" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2001" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2002" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2003" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2004" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2005" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2006" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2007" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2008" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2009" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2010" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2011" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2012" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2013" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n", "2014" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0\n" ] } ], "prompt_number": 111 }, { "cell_type": "code", "collapsed": false, "input": [ "dk=dk.set_index(['year','importer1','importer2'])\n", "dk.to_csv('datab.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 112 }, { "cell_type": "markdown", "metadata": {}, "source": [ "other direction flow ::: all optional from this point forward, main software was modified to handle this javascript" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data={}\n", "countries=set()\n", "for i in df.T.iteritems():\n", " year=int(i[1][2])\n", " target=cc(i[1][0])\n", " source=cc(i[1][1])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " value=i[1][3]\n", " countries.add(target)\n", " countries.add(source)\n", " if year not in data:data[year]={}\n", " if ((source in data[year]) and (target in data[year][source])):\n", " data[year][source][target][1]=value\n", " else:\n", " if target not in data[year]:data[year][target]={}\n", " if source not in data[year][target]:data[year][target][source]=[0,0]\n", " data[year][target][source][0]=value \n", "dk=pd.DataFrame(columns=['importer2','importer1','year','flow1','flow2'])\n", "c=0\n", "for year in data:\n", " for target in data[year]:\n", " for source in data[year][target]:\n", " dk.loc[c]=[target,source,year,data[year][target][source][0],data[year][target][source][1]]\n", " c+=1\n", " print year,'0'\n", "dk=dk.set_index(['year','importer2','importer1'])\n", "dk.to_csv('data.csv')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "extra experimentations" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data2={}\n", "countries2=set()\n", "for i in df.T.iteritems():\n", " year=int(i[1][2])\n", " target=cc(i[1][0])\n", " source=cc(i[1][1])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " value=i[1][3]\n", " countries2.add(target)\n", " countries2.add(source)\n", " if value<10000:\n", " target=\"Other\"\n", " if year not in data2:data2[year]={}\n", " if ((source in data2[year]) and (target in data2[year][source])):\n", " data2[year][source][target][1]+=value\n", " else:\n", " target=cc(i[1][0])\n", " source=cc(i[1][1])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " if value<10000:\n", " source=\"Other\"\n", " if target not in data2[year]:data2[year][target]={}\n", " if source not in data2[year][target]:data2[year][target][source]=[0,0]\n", " \n", " data2[year][target][source][0]+=value " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 225 }, { "cell_type": "code", "collapsed": false, "input": [ "dk2=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n", "c=0\n", "for year in data2:\n", " for target in data2[year]:\n", " for source in data2[year][target]:\n", " dk2.loc[c]=[target,source,year,data2[year][target][source][0],data2[year][target][source][1]]\n", " c+=1\n", " print year,'1'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1975 1\n", "1976 1\n", "1977" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1978" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1979" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1980" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1981" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1982" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1983" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1984" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1985" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1986" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1987" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1988" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1989" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1990" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1991" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1992" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1993" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1994" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1995" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1996" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1997" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1998" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "1999" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2000" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2001" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2002" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2003" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2004" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2005" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2006" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2007" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2008" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2009" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2010" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2011" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2012" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n", "2013" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 1\n" ] } ], "prompt_number": 226 }, { "cell_type": "code", "collapsed": false, "input": [ "dk2=dk2.set_index(['year','importer1','importer2'])\n", "dk2.to_csv('data2.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 227 }, { "cell_type": "code", "collapsed": false, "input": [ "data3={}\n", "for i in df.T.iteritems():\n", " year=int(i[1][2])\n", " target=cc(i[1][0])\n", " source=cc(i[1][1])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " value=i[1][3]\n", " if value<0:\n", " target=\"Other\"\n", " if year not in data3:data3[year]={}\n", " if ((source in data3[year]) and (target in data3[year][source])):\n", " data3[year][source][target][1]+=value\n", " else:\n", " target=cc(i[1][0])\n", " source=cc(i[1][1])\n", " if target==\"Various\":\n", " target=\"Other\"\n", " if source==\"Various\":\n", " source=\"Other\"\n", " if value<0:\n", " source=\"Other\"\n", " if target not in data3[year]:data3[year][target]={}\n", " if source not in data3[year][target]:data3[year][target][source]=[0,0]\n", " \n", " data3[year][target][source][0]+=value " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 262 }, { "cell_type": "code", "collapsed": false, "input": [ "#for th in {1000,5000,10000,50000,100000}:\n", "for th in {10000,100000}: \n", " dk3=pd.DataFrame(columns=['importer1','importer2','year','flow1','flow2'])\n", " c=0 \n", " for year in data3:\n", " p=0\n", " dk3.loc[c]=['Other','Other',year,0,0]\n", " r=copy.deepcopy(c)\n", " c+=1\n", " for target in data3[year]:\n", " for source in data3[year][target]:\n", " if ((source!='Other') and (target!='Other')):\n", " if ((data3[year][target][source][0]>th)or(data3[year][target][source][1]>th)):\n", " dk3.loc[c]=[target,source,year,data3[year][target][source][0],data3[year][target][source][1]]\n", " else: p+=data3[year][target][source][0]+data3[year][target][source][1]\n", " else: dk3.loc[r]['flow1']+=data3[year][target][source][0]+data3[year][target][source][1]\n", " c+=1\n", " dk3[(dk3['year']==year)&(dk3['importer2']=='Other')&(dk3['importer1']=='Other')]['flow1']+=p\n", " dk3=dk3.set_index(['year','importer1','importer2'])\n", " dk3.to_csv(repr(th)+'data3.csv')\n", " dk3b=dk3.copy()\n", " dk3b.index.names=[u'year', u'importer2', u'importer1']\n", " dk3b.to_csv(repr(th)+'data3b.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 327 }, { "cell_type": "code", "collapsed": false, "input": [ "dk3.loc[1976]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
flow1flow2
importer1importer2
OtherOther 2472611 0
EthiopiaSudan 10600 105000
TanzaniaBurundi 126600 0
Rwanda 23600 0
DRCBurundi 18220 0
Rwanda 24500 0
CameroonEquatorial Guinea 30000 0
AlgeriaWestern Sahara 35000 0
ZambiaAngola 27730 0
SenegalGuinea-Bissau 37000 0
ThailandLao PDR 62720 0
Cambodia 17090 0
GabonEquatorial Guinea 60000 0
AngolaDRC 5000 471340
C\u00f4te d'IvoireGuinea 500000 0
MozambiqueZimbabwe 30000 0
UgandaDRC 34230 0
Rwanda 78480 0
BurundiRwanda 49500 7500
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 326, "text": [ " flow1 flow2\n", "importer1 importer2 \n", "Other Other 2472611 0\n", "Ethiopia Sudan 10600 105000\n", "Tanzania Burundi 126600 0\n", " Rwanda 23600 0\n", "DRC Burundi 18220 0\n", " Rwanda 24500 0\n", "Cameroon Equatorial Guinea 30000 0\n", "Algeria Western Sahara 35000 0\n", "Zambia Angola 27730 0\n", "Senegal Guinea-Bissau 37000 0\n", "Thailand Lao PDR 62720 0\n", " Cambodia 17090 0\n", "Gabon Equatorial Guinea 60000 0\n", "Angola DRC 5000 471340\n", "C\u00f4te d'Ivoire Guinea 500000 0\n", "Mozambique Zimbabwe 30000 0\n", "Uganda DRC 34230 0\n", " Rwanda 78480 0\n", "Burundi Rwanda 49500 7500" ] } ], "prompt_number": 326 } ], "metadata": {} } ] }