├── .gitignore ├── LICENSE ├── README.md └── server.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | venv/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Köln API 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kvb-api 2 | ======= 3 | 4 | ## Installation (für Ungeduldige) 5 | 6 | virtualenv venv 7 | source venv/bin/activate 8 | pip install beautifulsoup4 requests flask 9 | 10 | ## Start 11 | 12 | python server.py 13 | 14 | 15 | Ein paar Test-URLs: 16 | 17 | http://127.0.0.1:5000/ 18 | http://127.0.0.1:5000/stations/ 19 | http://127.0.0.1:5000/stations/1/ 20 | http://127.0.0.1:5000/stations/2/ 21 | http://127.0.0.1:5000/stations/3/ 22 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | # encoding: utf8 2 | 3 | from flask import Flask 4 | from flask import json 5 | from datetime import datetime 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from parse import * 9 | from functools import wraps 10 | from flask import request 11 | import re 12 | 13 | app = Flask(__name__) 14 | 15 | from werkzeug.contrib.cache import SimpleCache 16 | cache = SimpleCache() 17 | 18 | # URL templates fuer den Scraper 19 | URL_TEMPLATES = { 20 | "station_details": "/haltestellen/overview/{station_id:d}/", 21 | "line_details": "/haltestellen/showline/{station_id:d}/{line_id:d}/", 22 | "schedule_table": "/haltestellen/aushang/{station_id:d}/", 23 | "schedule_pocket": "/haltestellen/miniplan/{station_id:d}/", 24 | "departures": "/qr/{station_id:d}/" 25 | } 26 | 27 | # Die brauchen wir bei jeder Anfrage 28 | HEADERS = { 29 | "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36" 30 | } 31 | 32 | def cached(timeout=5 * 60, key='view/%s'): 33 | def decorator(f): 34 | @wraps(f) 35 | def decorated_function(*args, **kwargs): 36 | cache_key = key % request.path 37 | rv = cache.get(cache_key) 38 | if rv is not None: 39 | return rv 40 | rv = f(*args, **kwargs) 41 | cache.set(cache_key, rv, timeout=timeout) 42 | return rv 43 | return decorated_function 44 | return decorator 45 | 46 | 47 | def get_stations(): 48 | """ 49 | Ruft Liste aller Stationen ab und gibt 50 | Dict mit ID als Schlüssel und Name als Wert aus. 51 | """ 52 | url = "https://www.kvb.koeln/haltestellen/overview/" 53 | r = requests.get(url, headers=HEADERS) 54 | soup = BeautifulSoup(r.text) 55 | #print(soup.prettify()) 56 | mystations = [] 57 | for a in soup.find_all("a"): 58 | #print(a, a.get("href"), a.text) 59 | href = a.get("href") 60 | if href is None: 61 | continue 62 | result = parse( 63 | URL_TEMPLATES["station_details"], 64 | href) 65 | if result is None: 66 | continue 67 | mystations.append({ 68 | "id": int(result["station_id"]), 69 | "name": a.text 70 | }) 71 | # sort by id 72 | mystations = sorted(mystations, key=lambda k: k['id']) 73 | station_dict = {} 74 | for s in mystations: 75 | station_dict[s["id"]] = s["name"] 76 | return station_dict 77 | 78 | 79 | def get_station_details(station_id): 80 | """ 81 | Liest Details zu einer Station. 82 | """ 83 | url = "https://www.kvb.koeln/haltestellen/overview/%d/" % station_id 84 | r = requests.get(url, headers=HEADERS) 85 | soup = BeautifulSoup(r.text) 86 | details = { 87 | "station_id": station_id, 88 | "name": stations[station_id], 89 | "line_ids": set() 90 | } 91 | div = soup.find("ul", class_="info-list") 92 | for a in div.find_all("a"): 93 | href = a.get("href") 94 | if href is None: 95 | continue 96 | result = parse( 97 | URL_TEMPLATES["line_details"], 98 | href) 99 | if result is None: 100 | continue 101 | details["line_ids"].add(result["line_id"]) 102 | details["line_ids"] = sorted(list(details["line_ids"])) 103 | return details 104 | 105 | 106 | def get_line_details(station_id, line_id): 107 | """ 108 | Findet heraus, welche Stationen eine Linie anfährt 109 | """ 110 | url = "https://www.kvb.koeln/haltestellen/showline/%d/%d/" % ( 111 | station_id, line_id) 112 | r = requests.get(url, headers=HEADERS) 113 | soup = BeautifulSoup(r.text) 114 | details = { 115 | "station_id": station_id, 116 | "line_id": line_id, 117 | "stations_forward": [], 118 | "stations_reverse": [] 119 | } 120 | station_key = "stations_forward" 121 | count = 0 122 | for td in soup.find_all("td", class_=re.compile(".*station")): 123 | tdclass = td.get("class")[0] 124 | if tdclass == u'station-top': 125 | count = count + 1 126 | if count == 2: 127 | station_key = "stations_reverse" 128 | 129 | a = td.find("a") 130 | if a is None: 131 | continue 132 | href = a.get("href") 133 | if href is None: 134 | continue 135 | result = parse( 136 | URL_TEMPLATES["station_details"], 137 | href) 138 | if result is None: 139 | continue 140 | details[station_key].append(int(result["station_id"])) 141 | return details 142 | 143 | 144 | def get_departures(station_id): 145 | """ 146 | Aktuelle Abfahrten von einer Station laden 147 | """ 148 | url = "https://www.kvb.koeln/qr/%d/" % station_id 149 | r = requests.get(url, headers=HEADERS) 150 | soup = BeautifulSoup(r.text) 151 | tables = soup.find_all("table", class_="display") 152 | departures = [] 153 | for row in tables[0].find_all("tr"): 154 | tds = row.find_all("td") 155 | (line_id, direction, time) = (tds[0].text, tds[1].text, tds[2].text) 156 | line_id = line_id.replace(u"\xa0", "") 157 | direction = direction.replace(u"\xa0", "") 158 | time = time.replace(u"\xa0", " ").strip().lower() 159 | if time == "sofort": 160 | time = "0" 161 | time = time.replace(" min", "") 162 | try: 163 | line_id = int(line_id) 164 | except: 165 | pass 166 | print(line_id, direction, time) 167 | departures.append({ 168 | "line_id": line_id, 169 | "direction": direction, 170 | "wait_time": time 171 | }) 172 | return departures 173 | 174 | 175 | @app.route("/") 176 | def index(): 177 | output = { 178 | "datetime": datetime.utcnow(), 179 | "methods": { 180 | "station_list": "/stations/", 181 | "station_details": "/stations/{station_id}/", 182 | "departures": "/stations/{station_id}/departures/", 183 | "line_details": "/stations/{station_id}/lines/{line_id}/" 184 | } 185 | } 186 | return json.dumps(output) 187 | 188 | 189 | @app.route("/stations/") 190 | @cached() 191 | def stations_list(): 192 | return json.dumps(stations) 193 | 194 | 195 | @app.route("/stations//") 196 | @cached() 197 | def station_details(station_id): 198 | details = get_station_details(station_id) 199 | return json.dumps(details) 200 | 201 | 202 | @app.route("/stations//lines//") 203 | @cached() 204 | def line_stations(station_id, line_id): 205 | details = get_line_details(station_id, line_id) 206 | return json.dumps(details) 207 | 208 | 209 | @app.route("/stations//departures/") 210 | def station_departuress(station_id): 211 | details = get_departures(station_id) 212 | return json.dumps(details) 213 | 214 | # Add CORS header to every request 215 | @app.after_request 216 | def add_cors(resp): 217 | resp.headers['Access-Control-Allow-Origin'] = request.headers.get('Origin','*') 218 | resp.headers['Access-Control-Allow-Credentials'] = 'true' 219 | resp.headers['Access-Control-Allow-Methods'] = 'POST, OPTIONS, GET' 220 | resp.headers['Access-Control-Allow-Headers'] = request.headers.get('Access-Control-Request-Headers', 'Authorization' ) 221 | if app.debug: 222 | resp.headers['Access-Control-Max-Age'] = '1' 223 | return resp 224 | 225 | if __name__ == "__main__": 226 | stations = get_stations() 227 | stations_reverse = {} 228 | for sid in stations.keys(): 229 | stations_reverse[stations[sid]] = sid 230 | app.config["DEBUG"] = True 231 | app.run() 232 | --------------------------------------------------------------------------------