├── requirements.txt ├── map.png ├── .github └── workflows │ └── update.yml ├── README.md └── scrape_airline_routes.py /requirements.txt: -------------------------------------------------------------------------------- 1 | geopy==2.2.0 2 | lxml==4.6.3 3 | curl_cffi 4 | -------------------------------------------------------------------------------- /map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jonty/airline-route-data/HEAD/map.png -------------------------------------------------------------------------------- /.github/workflows/update.yml: -------------------------------------------------------------------------------- 1 | name: Update airline route data 2 | on: 3 | schedule: 4 | - cron: '0 0 * * 0' 5 | workflow_dispatch: 6 | jobs: 7 | scrape-and-commit: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 11 | uses: actions/checkout@v2 12 | with: 13 | persist-credentials: false 14 | - name: Scrape and commit 15 | run: | 16 | pip install -r requirements.txt 17 | python -u scrape_airline_routes.py 18 | 19 | git config --global user.email "bot@jonty.co.uk" 20 | git config --global user.name "Airline Route Update GHA" 21 | 22 | git add airline_routes.json 23 | git commit -m "Automatic airline route update" 24 | - name: Push changes 25 | uses: ad-m/github-push-action@master 26 | with: 27 | github_token: ${{ secrets.GITHUB_TOKEN }} 28 | branch: ${{ github.ref }} 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Global airline routes data 2 | ========================== 3 | 4 | This is a single JSON file that describes every passenger airport and their outbound airline routes, automatically updated weekly. 5 | 6 | The file is indexed by IATA airport code. Routes specify the destination airport IATA code, flight time in minutes, and distance in kilometers. 7 | 8 | You probably want to look at [airline_routes.json](airline_routes.json). 9 | 10 | A nice map of the data 11 | ---------------------- 12 | 13 | 14 | Example entry 15 | ------------- 16 | ```JSON 17 | { 18 | "LHR": { 19 | "city_name": "London", 20 | "continent": "EU", 21 | "country": "United Kingdom", 22 | "country_code": "GB", 23 | "display_name": "London (LHR), United Kingdom", 24 | "elevation": 80, 25 | "iata": "LHR", 26 | "icao": "EGLL", 27 | "latitude": "51.469603", 28 | "longitude": "-0.453566", 29 | "name": "Heathrow", 30 | "routes": [ 31 | { 32 | "carriers": [ 33 | { 34 | "iata": "BA", 35 | "name": "British Airways" 36 | }, 37 | { 38 | "iata": "KL", 39 | "name": "KLM" 40 | } 41 | ], 42 | "iata": "AMS", 43 | "km": 371, 44 | "min": 80 45 | }, 46 | { 47 | "carriers": [ 48 | { 49 | "iata": "BA", 50 | "name": "British Airways" 51 | }, 52 | { 53 | "iata": "LH", 54 | "name": "Lufthansa" 55 | } 56 | ], 57 | "iata": "MUC", 58 | "km": 943, 59 | "min": 115 60 | }, 61 | { 62 | "carriers": [ 63 | { 64 | "iata": "AF", 65 | "name": "Air France" 66 | }, 67 | { 68 | "iata": "BA", 69 | "name": "British Airways" 70 | } 71 | ], 72 | "iata": "CDG", 73 | "km": 348, 74 | "min": 80 75 | }, 76 | ... 77 | ], 78 | "timezone": "Europe/London" 79 | } 80 | } 81 | ``` 82 | -------------------------------------------------------------------------------- /scrape_airline_routes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import json 4 | from collections import defaultdict 5 | import time 6 | 7 | from curl_cffi import requests 8 | import lxml.html 9 | from geopy.distance import geodesic 10 | 11 | if __name__ == "__main__": 12 | 13 | print("Fetching airports list...") 14 | response = requests.get( 15 | "https://www.flightsfrom.com/airports", impersonate="chrome" 16 | ) 17 | try: 18 | airports_json = json.loads(response.content) 19 | except json.decoder.JSONDecodeError as e: 20 | print("Failed to load airport JSON, page body was: '%s'" % response.content) 21 | sys.exit(1) 22 | 23 | iatas = [airport["IATA"] for airport in airports_json["response"]["airports"]] 24 | 25 | airports = defaultdict(dict) 26 | 27 | while iatas: 28 | iata = iatas.pop() 29 | if iata in airports: 30 | continue 31 | 32 | print("Fetching #%s: %s" % (len(airports), iata)) 33 | 34 | while True: 35 | try: 36 | response = requests.get( 37 | "https://www.flightsfrom.com/%s/destinations" % iata, impersonate="chrome" 38 | ) 39 | root = lxml.html.document_fromstring(response.content) 40 | metadata_nodes = root.xpath('//script[contains(., "window.airport")]') 41 | metadata_tag = metadata_nodes[0].text_content() 42 | metadata_bits = metadata_tag.split("window.") 43 | break 44 | except Exception as e: 45 | print("! Error while fetching IATA, having a little 5m sleep before retrying: %s" % e) 46 | time.sleep(60*5) 47 | 48 | metadata = {} 49 | for bit in metadata_bits: 50 | split = bit.find("=") 51 | if split != -1: 52 | metadata[bit[:split].strip()] = json.loads(bit.strip()[split + 2 : -1]) 53 | 54 | airport_fields = [ 55 | "city_name", 56 | "continent", 57 | "country", 58 | "country_code", 59 | "display_name", 60 | "elevation", 61 | "IATA", 62 | "ICAO", 63 | "latitude", 64 | "longitude", 65 | "name", 66 | "timezone", 67 | ] 68 | airport = { 69 | field.lower(): metadata["airport"][field] for field in airport_fields 70 | } 71 | if airport["elevation"]: 72 | airport["elevation"] = int(airport["elevation"]) 73 | 74 | routes = [] 75 | for route in metadata["routes"]: 76 | carrier_fields = [ 77 | "name", 78 | "IATA", 79 | ] 80 | 81 | carriers = [] 82 | for aroute in route["airlineroutes"]: 83 | is_passenger = ( 84 | aroute["airline"]["is_scheduled_passenger"] == "1" 85 | or aroute["airline"]["is_nonscheduled_passenger"] == "1" 86 | ) 87 | is_active = aroute["airline"]["active"] 88 | if is_active and is_passenger: 89 | carriers.append( 90 | { 91 | field.lower(): aroute["airline"][field] 92 | for field in carrier_fields 93 | } 94 | ) 95 | 96 | orig_ll = (airport["latitude"], airport["longitude"]) 97 | dest_ll = (route["airport"]["latitude"], route["airport"]["longitude"]) 98 | distance = int(geodesic(orig_ll, dest_ll).km) 99 | 100 | routes.append( 101 | { 102 | "carriers": carriers, 103 | "km": distance, 104 | "min": int(route["common_duration"]), 105 | "iata": route["iata_to"], 106 | } 107 | ) 108 | 109 | iatas.append(route["iata_to"]) 110 | 111 | airport["routes"] = routes 112 | airports[iata] = airport 113 | 114 | time.sleep(1) 115 | 116 | with open("airline_routes.json", "w") as f: 117 | f.write(json.dumps(airports, indent=4, sort_keys=True, separators=(",", ": "))) 118 | --------------------------------------------------------------------------------