{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd, json, numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "url='http://en.wikipedia.org/wiki/List_of_airports_in_Romania'\n", "df=pd.read_html(url)\n", "df=df[0].loc[:17].T.set_index(0).T.loc[2:].set_index('IATA')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
City servedICAOAirport nameWebsiteFrequencyStatusnan
IATA
ARWAradLRARArad International Airporthttp://www.aeroportularad.roTWR 130.2 MHzNaNNaN
BCMBacăuLRBCBacău \"George Enescu\" International Airport [14]http://www.bacauairport.roTWR 118.6 MHzNaNNaN
BAYBaia Mare / Tăuții-MăgherăușLRBMBaia Mare Airport (Tăuții-Măgherăuș Airport)http://www.baiamareairport.roTWR 123.6 MHzClosed for renovationNaN
BBUBucharest / BăneasaLRBSBucharest \"Aurel Vlaicu\" International Airport...http://www.baneasa.aeroAPP 127.6 MHz TWR 120.8 MHzOnly private flightsNaN
OTPBucharest / OtopeniLROPBucharest \"Henri Coandǎ\" International Airport...http://www.otp-airport.roAPP 126.2 TWR1 120.9 TWR2 121.85NaNNaN
CLJCluj-NapocaLRCLCluj \"Avram Iancu\" International Airporthttp://www.airportcluj.roAPP 125.1 MHz TWR 134.4 MHzNaNNaN
CNDConstanțaLRCKConstanța \"Mihail Kogălniceanu\" International ...http://www.mk-airport.roTWR 120.24 MHzNaNNaN
CRACraiovaLRCVCraiova Airporthttp://www.aeroportcraiova.roTWR 124.3 MHzNaNNaN
IASIașiLRIAIași International Airporthttp://www.aeroport.roTWR 119.2 MHzNaNNaN
OMROradeaLRODOradea International Airporthttp://www.aeroportoradea.roTWR 120.2 MHzNaNNaN
SUJSatu MareLRSMSatu Mare International Airporthttp://www.aeroportulsm.roTWR 118.8 MHzNaNNaN
SBZSibiuLRSBSibiu International AirportNaNTWR 122.7 MHzNaNNaN
SCVSuceavaLRSVSuceava \"Ștefan cel Mare\" International Airporthttp://www.aeroportsuceava.roAPP 120.9 MHz TWR 118.3 MHzNaNNaN
TGMTârgu MureșLRTM\"Transilvania\" Târgu Mureș Airporthttp://www.targumuresairport.roAPP 121.9 MHz TWR 125.9 MHzNaNNaN
TSRTimișoaraLRTRTimișoara \"Traian Vuia\" International Airport ...http://www.aerotim.roTWR 101.1 MHzNaNNaN
TCETulceaLRTC\"Delta Dunarii\" Tulcea Airport (Cataloi Airport)http://www.aeroportul-tulcea.ro/APP/TWR 120.3 MHzClosedNaN
\n", "
" ], "text/plain": [ "0 City served ICAO \\\n", "IATA \n", "ARW Arad LRAR \n", "BCM Bacău LRBC \n", "BAY Baia Mare / Tăuții-Măgherăuș LRBM \n", "BBU Bucharest / Băneasa LRBS \n", "OTP Bucharest / Otopeni LROP \n", "CLJ Cluj-Napoca LRCL \n", "CND Constanța LRCK \n", "CRA Craiova LRCV \n", "IAS Iași LRIA \n", "OMR Oradea LROD \n", "SUJ Satu Mare LRSM \n", "SBZ Sibiu LRSB \n", "SCV Suceava LRSV \n", "TGM Târgu Mureș LRTM \n", "TSR Timișoara LRTR \n", "TCE Tulcea LRTC \n", "\n", "0 Airport name \\\n", "IATA \n", "ARW Arad International Airport \n", "BCM Bacău \"George Enescu\" International Airport [14] \n", "BAY Baia Mare Airport (Tăuții-Măgherăuș Airport) \n", "BBU Bucharest \"Aurel Vlaicu\" International Airport... \n", "OTP Bucharest \"Henri Coandǎ\" International Airport... \n", "CLJ Cluj \"Avram Iancu\" International Airport \n", "CND Constanța \"Mihail Kogălniceanu\" International ... \n", "CRA Craiova Airport \n", "IAS Iași International Airport \n", "OMR Oradea International Airport \n", "SUJ Satu Mare International Airport \n", "SBZ Sibiu International Airport \n", "SCV Suceava \"Ștefan cel Mare\" International Airport \n", "TGM \"Transilvania\" Târgu Mureș Airport \n", "TSR Timișoara \"Traian Vuia\" International Airport ... \n", "TCE \"Delta Dunarii\" Tulcea Airport (Cataloi Airport) \n", "\n", "0 Website Frequency \\\n", "IATA \n", "ARW http://www.aeroportularad.ro TWR 130.2 MHz \n", "BCM http://www.bacauairport.ro TWR 118.6 MHz \n", "BAY http://www.baiamareairport.ro TWR 123.6 MHz \n", "BBU http://www.baneasa.aero APP 127.6 MHz TWR 120.8 MHz \n", "OTP http://www.otp-airport.ro APP 126.2 TWR1 120.9 TWR2 121.85 \n", "CLJ http://www.airportcluj.ro APP 125.1 MHz TWR 134.4 MHz \n", "CND http://www.mk-airport.ro TWR 120.24 MHz \n", "CRA http://www.aeroportcraiova.ro TWR 124.3 MHz \n", "IAS http://www.aeroport.ro TWR 119.2 MHz \n", "OMR http://www.aeroportoradea.ro TWR 120.2 MHz \n", "SUJ http://www.aeroportulsm.ro TWR 118.8 MHz \n", "SBZ NaN TWR 122.7 MHz \n", "SCV http://www.aeroportsuceava.ro APP 120.9 MHz TWR 118.3 MHz \n", "TGM http://www.targumuresairport.ro APP 121.9 MHz TWR 125.9 MHz \n", "TSR http://www.aerotim.ro TWR 101.1 MHz \n", "TCE http://www.aeroportul-tulcea.ro/ APP/TWR 120.3 MHz \n", "\n", "0 Status NaN \n", "IATA \n", "ARW NaN NaN \n", "BCM NaN NaN \n", "BAY Closed for renovation NaN \n", "BBU Only private flights NaN \n", "OTP NaN NaN \n", "CLJ NaN NaN \n", "CND NaN NaN \n", "CRA NaN NaN \n", "IAS NaN NaN \n", "OMR NaN NaN \n", "SUJ NaN NaN \n", "SBZ NaN NaN \n", "SCV NaN NaN \n", "TGM NaN NaN \n", "TSR NaN NaN \n", "TCE Closed NaN " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pygeocoder import Geocoder\n", "apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "results = Geocoder(apik).geocode('clj airport romania')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(46.7846851, 23.6889981)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results[0].coordinates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ARW\n", "BCM\n", "BAY\n", "BBU\n", "OTP\n", "CLJ\n", "CND\n", "CRA\n", "IAS\n", "OMR\n", "SUJ\n", "SBZ\n", "SCV\n", "TGM\n", "TSR\n", "TCE\n" ] } ], "source": [ "locations={}\n", "for i in df.index:\n", " results = Geocoder(apik).geocode(i+' airport romania')\n", " locations[i]=results[0].coordinates\n", " print i" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "file(\"locations_ro.json\",'w').write(json.dumps(locations))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "locations=json.loads(file('locations_ro.json','r').read())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BCM https://www.airportia.com/romania/bacău-airport/\n", "SCV https://www.airportia.com/romania/suceava-stefan-cel-mare-airport\n", "CLJ https://www.airportia.com/romania/cluj_napoca-international-airport\n", "ARW https://www.airportia.com/romania/arad-international-airport/arrivals\n", "SBZ https://www.airportia.com/romania/sibiu-international-airport/\n", "SUJ https://www.airportia.com/romania/satu-mare-airport/arrivals\n", "BAY https://www.airportia.com/romania/tautii-magheraus-airport\n", "OMR https://www.airportia.com/romania/oradea-international-airport/\n", "CND https://www.airportia.com/romania/mihail-kogălniceanu-international-airport\n", "CRA https://www.airportia.com/romania/craiova-airport/arrivals\n", "OTP https://www.airportia.com/romania/henri-coandă-international-airport\n", "BBU https://www.airportia.com/romania/băneasa...airport/departures\n", "TCE https://www.airportia.com/romania/tulcea-airport\n", "TSR https://www.airportia.com/romania/timişoara-traian-vuia-airport/\n", "IAS https://www.airportia.com/romania/iaşi-airport/arrivals\n", "TGM https://www.airportia.com/romania/transilvania-târgu-mureş-international- airport\n" ] } ], "source": [ "airportialinks={}\n", "for i in locations:\n", " print i,\n", " url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+romania'\n", " m=requests.get(url).content\n", " z=pd.read_html(m)[5][0][0]\n", " z=z[z.find('http'):]\n", " airportialinks[i]=z\n", " print z" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.airportia.com/romania/bacău-airport/\n", "https://www.airportia.com/romania/tulcea-airport/\n", "https://www.airportia.com/romania/cluj_napoca-international-airport/\n", "https://www.airportia.com/romania/arad-international-airport/\n", "https://www.airportia.com/romania/sibiu-international-airport/\n", "https://www.airportia.com/romania/satu-mare-airport/\n", "https://www.airportia.com/romania/tautii-magheraus-airport/\n", "https://www.airportia.com/romania/oradea-international-airport/\n", "https://www.airportia.com/romania/mihail-kogălniceanu-international-airport/\n", "https://www.airportia.com/romania/craiova-airport/\n", "https://www.airportia.com/romania/henri-coandă-international-airport/\n", "https://www.airportia.com/romania/băneasa-international-airport/\n", "https://www.airportia.com/romania/suceava-stefan-cel-mare-airport/\n", "https://www.airportia.com/romania/timişoara-traian-vuia-airport/\n", "https://www.airportia.com/romania/iaşi-airport/\n", "https://www.airportia.com/romania/transilvania-târgu-mureş-international-airport/\n" ] } ], "source": [ "#reformat\n", "for z in airportialinks:\n", " airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')\n", " if airportialinks[z][-1]!='/':airportialinks[z]+='/' \n", " #manual fixes\n", " if z=='TSR':airportialinks[z]='https://www.airportia.com/romania/timişoara-traian-vuia-airport/'\n", " print airportialinks[z]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sch={}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "record schedules for 2 weeks, then augment count with weekly flight numbers.\n", "seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BCM\n", "SCV\n", "CLJ\n", "ARW\n", "SBZ\n", "SUJ\n", "BAY\n", "OMR\n", "CND\n", "CRA\n", "OTP\n", "BBU\n", "TCE\n", "TSR\n", "IAS\n", "TGM\n" ] } ], "source": [ "for i in locations:\n", " print i\n", " if i not in sch:sch[i]={}\n", " if i!='TGM':\n", " #march 11-24 = 2 weeks\n", " for d in range (11,25):\n", " if d not in sch[i]:\n", " try:\n", " url=airportialinks[i]\n", " full=url+'departures/201703'+str(d)\n", " m=requests.get(full).content\n", " sch[i][full]=pd.read_html(m)[0]\n", " #print full\n", " except: pass #print 'no tables',i,d\n", " else:\n", " #november 17-30 = 2 weeks\n", " for d in range (17,31):\n", " if d not in sch[i]:\n", " try:\n", " url=airportialinks[i]\n", " full=url+'departures/201611'+str(d)\n", " m=requests.get(full).content\n", " sch[i][full]=pd.read_html(m)[0]\n", " #print full\n", " except: pass #print 'no tables',i,d" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mdf=pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for i in sch:\n", " for d in sch[i]:\n", " df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)\n", " df['From']=i\n", " df['Date']=d\n", " mdf=pd.concat([mdf,df])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mdf=mdf.replace('Hahn','Frankfurt')\n", "mdf=mdf.replace('Hahn HHN','Frankfurt HHN')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]\n", "mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [], "source": [ "file(\"mdf_ro_dest.json\",'w').write(json.dumps(mdf.reset_index().to_json()))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3034" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mdf)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "airlines=set(mdf['Airline'])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cities=set(mdf['City'])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "file(\"cities_ro_dest.json\",'w').write(json.dumps(list(cities)))\n", "file(\"airlines_ro_dest.json\",'w').write(json.dumps(list(airlines)))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "citycoords={}" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Kiev\n", "Paris\n", "Oslo\n", "Basel\n", "Beirut\n", "Zaragoza\n", "Liverpool\n", "Verona\n", "Malmo\n", "Castellon de la Plana\n", "Bologna\n", "Catania\n", "Treviso\n", "Brussels\n", "Bucharest\n", "Dubai\n", "Dublin\n", "Rome\n", "Varna\n", "Luqa\n", "Pescara\n", "Cologne\n", "Milan\n", "London\n", "Karlsruhe/Baden-Baden\n", "Strasbourg\n", "Dortmund\n", "Cluj-Napoca\n", "Nurnberg\n", "Amman\n", "Chisinau\n", "Vienna\n", "Moscow\n", "Bratislava\n", "Berlin\n", "Katowice\n", "Weeze\n", "Eindhoven\n", "Stuttgart\n", "Alicante\n", "Tenerife\n", "Frankfurt\n", "Thessaloniki\n", "Zurich\n", "Perugia\n", "Madrid\n", "Bari\n", "Doncaster\n", "Lyon\n", "Istanbul\n", "Pisa\n", "Turin\n", "Nice\n", "Larnaca\n", "Memmingen\n", "Hannover\n", "Malaga\n", "Hamburg\n", "Stockholm\n", "Tel Aviv\n", "Timisoara\n", "Doha\n", "Birmingham\n", "Florence\n", "Athens\n", "Satu Mare\n", "Oradea\n", "Valencia\n", "Naples\n", "Geneva\n", "Sibiu\n", "Munich\n", "Glasgow\n", "Alghero\n", "Budapest\n", "Dusseldorf\n", "Barcelona\n", "Billund\n", "Bristol\n", "Iasi\n", "Belgrade\n", "Prague\n", "Sofia\n", "Suceava\n", "Lisbon\n", "Amsterdam\n", "Copenhagen\n", "Warsaw\n" ] } ], "source": [ "for i in cities:\n", " if i not in citycoords:\n", " if i==u'Birmingham': z='Birmingham, UK'\n", " elif i==u'Valencia': z='Valencia, Spain'\n", " elif i==u'Naples': z='Naples, Italy'\n", " elif i==u'St. Petersburg': z='St. Petersburg, Russia'\n", " elif i==u'Bristol': z='Bristol, UK'\n", " else: z=i\n", " citycoords[i]=Geocoder(apik).geocode(z)\n", " print i" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "citysave={}\n", "for i in citycoords:\n", " citysave[i]={\"coords\":citycoords[i][0].coordinates,\n", " \"country\":citycoords[i][0].country}" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "file(\"citysave_ro_dest.json\",'w').write(json.dumps(citysave))" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }