{ "cells": [ { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd, json, numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'\n", "df=pd.read_html(url)\n", "df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Location servedCountyICAOAirport nameElev.Runways
IATA
BUDBudapest(Capital)LHBPBudapest Ferenc Liszt International Airport151 m (495 ft)3010 m x 59 m 3707 x 59 m
DEBDebrecenHajdú-BiharLHDCDebrecen International Airport109 m (359 ft)2498 m x 40 m
SOBSármellékZalaLHSMHévíz-Balaton Airport124 m (408 ft)2500 x 60 m
QGYGyőr-PérGyőr-Moson-SopronLHPRGyőr-Pér International Airport129 m (424 ft)2030 x 30 m 1134 x 43 m
QPJPécs-PogányBaranyaLHPPPécs-Pogány International Airport305 m (1000 ft)1500 x 30 m
\n", "
" ], "text/plain": [ "0 Location served County ICAO \\\n", "IATA \n", "BUD Budapest (Capital) LHBP \n", "DEB Debrecen Hajdú-Bihar LHDC \n", "SOB Sármellék Zala LHSM \n", "QGY Győr-Pér Győr-Moson-Sopron LHPR \n", "QPJ Pécs-Pogány Baranya LHPP \n", "\n", "0 Airport name Elev. \\\n", "IATA \n", "BUD Budapest Ferenc Liszt International Airport 151 m (495 ft) \n", "DEB Debrecen International Airport 109 m (359 ft) \n", "SOB Hévíz-Balaton Airport 124 m (408 ft) \n", "QGY Győr-Pér International Airport 129 m (424 ft) \n", "QPJ Pécs-Pogány International Airport 305 m (1000 ft) \n", "\n", "0 Runways \n", "IATA \n", "BUD 3010 m x 59 m 3707 x 59 m \n", "DEB 2498 m x 40 m \n", "SOB 2500 x 60 m \n", "QGY 2030 x 30 m 1134 x 43 m \n", "QPJ 1500 x 30 m " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pygeocoder import Geocoder\n", "apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BUD\n", "DEB\n", "SOB\n", "QGY\n", "QPJ\n" ] } ], "source": [ "locations={}\n", "for i in df.index:\n", " results = Geocoder(apik).geocode(i+' airport Hungary')\n", " locations[i]=results[0].coordinates\n", " print i" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "file(\"locations_hu.json\",'w').write(json.dumps(locations))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "locations=json.loads(file('locations_hu.json','r').read())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "QPJ https://www.airportia.com/hungary/pécs_pogány-airport/map/\n", "DEB https://www.airportia.com/hungary/debrecen-international-airport\n", "SOB https://www.airportia.com/hungary/sármellék...airport/arrivals\n", "BUD https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport\n", "QGY https://www.airportia.com/hungary/győr_pér...airport/photos\n" ] } ], "source": [ "airportialinks={}\n", "for i in locations:\n", " print i,\n", " if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'\n", " else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'\n", " m=requests.get(url).content\n", " z=pd.read_html(m)[5][0][0]\n", " z=z[z.find('http'):]\n", " airportialinks[i]=z\n", " print z" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.airportia.com/hungary/pécs_pogány-airport/map/\n", "https://www.airportia.com/hungary/debrecen-international-airport/\n", "https://www.airportia.com/hungary/sármellék-international-airport/\n", "https://www.airportia.com/hungary/győr_pér-international-airport/\n", "https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/\n" ] } ], "source": [ "#reformat\n", "for z in airportialinks:\n", " airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')\n", " if airportialinks[z][-1]!='/':airportialinks[z]+='/' \n", " #manual fixes\n", " if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'\n", " print airportialinks[z]" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sch={}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "record schedules for 2 weeks, then augment count with weekly flight numbers.\n", "seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past." ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "QPJ\n", "DEB\n", "SOB\n", "BUD\n", "QGY\n" ] } ], "source": [ "for i in locations:\n", " print i\n", " if i not in sch:sch[i]={}\n", " #march 11-24 = 2 weeks\n", " for d in range (11,25):\n", " if d not in sch[i]:\n", " try:\n", " url=airportialinks[i]\n", " full=url+'arrivals/201703'+str(d)\n", " m=requests.get(full).content\n", " sch[i][full]=pd.read_html(m)[0]\n", " #print full\n", " except: pass #print 'no tables',i,d" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nr. of flights on March 11 : 87\n", "nr. of flights on March 12 : 117\n", "nr. of flights on March 13 : 122\n", "nr. of flights on March 14 : 102\n", "nr. of flights on March 15 : 108\n", "nr. of flights on March 16 : 109\n", "nr. of flights on March 17 : 126\n", "nr. of flights on March 18 : 86\n", "nr. of flights on March 19 : 120\n", "nr. of flights on March 20 : 126\n", "nr. of flights on March 21 : 105\n", "nr. of flights on March 22 : 110\n", "nr. of flights on March 23 : 110\n", "nr. of flights on March 24 : 124\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightFromAirlineScheduledArrivalStatusUnnamed: 6
14LH1334Frankfurt FRALufthansa10:0509:57LandedTrack >
41LH1338Frankfurt FRALufthansa13:4513:33LandedTrack >
61LH1340Frankfurt FRALufthansa18:0017:59LandedTrack >
79LH1342Frankfurt FRALufthansa23:0522:58LandedTrack >
\n", "
" ], "text/plain": [ " Flight From Airline Scheduled Arrival Status Unnamed: 6\n", "14 LH1334 Frankfurt FRA Lufthansa 10:05 09:57 Landed Track >\n", "41 LH1338 Frankfurt FRA Lufthansa 13:45 13:33 Landed Track >\n", "61 LH1340 Frankfurt FRA Lufthansa 18:00 17:59 Landed Track >\n", "79 LH1342 Frankfurt FRA Lufthansa 23:05 22:58 Landed Track >" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for i in range(11,25):\n", " testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i)\n", " print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])\n", "testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318'\n", "k=sch['BUD'][testurl]\n", "k[k['From']=='Frankfurt FRA']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`sch` checks out with source" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mdf=pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for i in sch:\n", " for d in sch[i]:\n", " df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)\n", " df['To']=i\n", " df['Date']=d\n", " mdf=pd.concat([mdf,df])" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mdf=mdf.replace('Hahn','Frankfurt')\n", "mdf=mdf.replace('Hahn HHN','Frankfurt HHN')" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]\n", "mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FromAirlineToDateCityAirport
14Frankfurt FRALufthansaBUDhttps://www.airportia.com/hungary/budapest-lis...FrankfurtFRA
41Frankfurt FRALufthansaBUDhttps://www.airportia.com/hungary/budapest-lis...FrankfurtFRA
61Frankfurt FRALufthansaBUDhttps://www.airportia.com/hungary/budapest-lis...FrankfurtFRA
79Frankfurt FRALufthansaBUDhttps://www.airportia.com/hungary/budapest-lis...FrankfurtFRA
\n", "
" ], "text/plain": [ " From Airline To \\\n", "14 Frankfurt FRA Lufthansa BUD \n", "41 Frankfurt FRA Lufthansa BUD \n", "61 Frankfurt FRA Lufthansa BUD \n", "79 Frankfurt FRA Lufthansa BUD \n", "\n", " Date City Airport \n", "14 https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA \n", "41 https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA \n", "61 https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA \n", "79 https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA " ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k=mdf[mdf['Date']==testurl]\n", "k[k['From']=='Frankfurt FRA']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`mdf` checks out with source" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [], "source": [ "file(\"mdf_hu_arrv.json\",'w').write(json.dumps(mdf.reset_index().to_json()))" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1584" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mdf)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [], "source": [ "airlines=set(mdf['Airline'])" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cities=set(mdf['City'])" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": true }, "outputs": [], "source": [ "file(\"cities_hu_arrv.json\",'w').write(json.dumps(list(cities)))\n", "file(\"airlines_hu_arrv.json\",'w').write(json.dumps(list(airlines)))" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": true }, "outputs": [], "source": [ "citycoords={}" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Manchester\n", "Lyon\n", "Sofia\n", "Oslo\n", "Kiev\n", "Istanbul\n", "Paris\n", "Bologna\n", "Riga\n", "Cairo\n", "Algiers\n", "St. Petersburg\n", "Gothenburg\n", "Nurnberg\n", "Barcelona\n", "Cologne\n", "Lanzarote\n", "Rotterdam\n", "Vienna\n", "Glasgow\n", "Nice\n", "Cluj-Napoca\n", "Edinburgh\n", "Liverpool\n", "Alicante\n", "Larnaca\n", "Dortmund\n", "Moscow\n", "Madrid\n", "Thessaloniki\n", "Munich\n", "Malmo\n", "Kutaisi\n", "Berlin\n", "Geneva\n", "Leeds\n", "Fuerteventura\n", "Catania\n", "Treviso\n", "Brussels\n", "Hong Kong\n", "Eilat\n", "Porto\n", "Dubai\n", "Eindhoven\n", "Malaga\n", "Helsinki\n", "Naples\n", "Basel\n", "East Midlands\n", "Hamburg\n", "Dublin\n", "Dusseldorf\n", "Tenerife\n", "Athens\n", "Stuttgart\n", "Zurich\n", "Minsk\n", "Pisa\n", "Stockholm\n", "Bristol\n", "Tel Aviv\n", "Venice\n", "Frankfurt\n", "Las Palmas\n", "Bucharest\n", "Reykjavik\n", "Belgrade\n", "Doha\n", "Billund\n", "Karlsruhe/Baden-Baden\n", "Prague\n", "Baku\n", "Birmingham\n", "Luqa\n", "Milan\n", "Rome\n", "London\n", "Lisbon\n", "Bari\n", "Amsterdam\n", "Copenhagen\n", "Hurghada\n", "Warsaw\n" ] } ], "source": [ "for i in cities:\n", " if i not in citycoords:\n", " if i==u'Birmingham': z='Birmingham, UK'\n", " elif i==u'Valencia': z='Valencia, Spain'\n", " elif i==u'Naples': z='Naples, Italy'\n", " elif i==u'St. Petersburg': z='St. Petersburg, Russia'\n", " elif i==u'Bristol': z='Bristol, UK'\n", " else: z=i\n", " citycoords[i]=Geocoder(apik).geocode(z)\n", " print i" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": true }, "outputs": [], "source": [ "citysave={}\n", "for i in citycoords:\n", " citysave[i]={\"coords\":citycoords[i][0].coordinates,\n", " \"country\":citycoords[i][0].country}" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [], "source": [ "file(\"citysave_hu_arrv.json\",'w').write(json.dumps(citysave))" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }