├── testDB.py ├── main.py ├── cityplan.py ├── scrapeDB.py ├── readpdf.py ├── README.md ├── paint.py └── deutschebahn.py /testDB.py: -------------------------------------------------------------------------------- 1 | import deutschebahn 2 | import pickle 3 | 4 | with open('./pickles/allstations.pkl', 'rb') as F: 5 | allstations = pickle.load(F) 6 | 7 | with open('./pickles/alltimetables.pkl', 'rb') as F: 8 | alltimetables = pickle.load(F) 9 | 10 | with open('./pickles/alltrains.pkl', 'rb') as F: 11 | alltrains = pickle.load(F) 12 | 13 | germany = deutschebahn.System(alltrains, allstations, alltimetables, max_bloom_elements=30, failure_rate=0.01) 14 | germany.test_track_coherence(1000) 15 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python script. 2 | 3 | # Press Shift+F10 to execute it or replace it with your code. 4 | # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. 5 | 6 | 7 | # Chicago has almost 2000 buses 8 | # 150 bus routes 9 | # 12,000 bus stops 10 | # 10 train lines 11 | # max 50 stops / bus 12 | # 150 train stations 13 | import sys 14 | import cityplan 15 | import paint 16 | # city = cityplan.City(num_bus_routes=150, num_stops_each=50, stops_in_city=12000, max_stops=104) 17 | # print(f"{city.test_city(100000)} success rate") 18 | 19 | city = cityplan.City(num_bus_routes=2000, num_stops_each=15, stops_in_city=2000, max_bloom_elements=50) 20 | print(f"{city.test_city(10000)} success rate") 21 | 22 | 23 | sys.exit() 24 | 25 | # Show bit array total for a bus. 26 | # The last line has 27 leading 0's (final 5 good) 27 | # 997 bytes 28 | print('\n'.join((format(x, '#034b')[2:] for x in city.mybusses[-1].bloomstops.backend.array_))) 29 | bitstring = ''.join((format(x, '#034b')[2:] for x in city.mybusses[-1].bloomstops.backend.array_)) 30 | #ending = bitstring[-5:] 31 | #bitstring = bitstring[:-32] + ending 32 | print(bitstring) 33 | print(len(bitstring)) 34 | 35 | not_existing_route = list(city.validstops)[-1] 36 | mynotstop = cityplan.Bus(route=[not_existing_route] , max_bloom_elements=40) 37 | print("Not Looking for stop: ", mynotstop.liststops) 38 | #num_stops_each=30, stops_in_city=500, max_bloom_elements=40) 39 | 40 | existing_route = list(city.mybusses[-1].liststops)[-1] 41 | mystop = cityplan.Bus(route=[existing_route] , max_bloom_elements=40) 42 | print("Looking for stop: ", mystop.liststops) 43 | print("in", city.mybusses[-1].liststops ) 44 | 45 | 46 | c = paint.Canvas() 47 | c.visualizeByteString(''.join((format(x, '#034b')[2:] for x in mynotstop.bloomstops.backend.array_))) 48 | c.visualizeByteString(''.join((format(x, '#034b')[2:] for x in mystop.bloomstops.backend.array_))) 49 | c.visualizeByteString(bitstring, allLetters=True) -------------------------------------------------------------------------------- /cityplan.py: -------------------------------------------------------------------------------- 1 | import random 2 | from bloom_filter2 import BloomFilter 3 | 4 | 5 | class City: 6 | 7 | def __init__(self, num_bus_routes=100, num_stops_each = 30, stops_in_city=500, max_bloom_elements=100): 8 | self.mybusses = [] 9 | self.validstops = set() 10 | print(f"City:\nNum routes = {num_bus_routes}\nnum_stops_each = {num_stops_each}\ntotal stops = {stops_in_city}") 11 | 12 | for i in range(num_bus_routes): 13 | stops = random.sample(range(stops_in_city), num_stops_each) 14 | self.mybusses.append(Bus(stops, max_bloom_elements, 0.01)) 15 | self.validstops.update(stops) 16 | 17 | self.validstops = list(self.validstops) 18 | arraysize = self.mybusses[-1].getarrayinfo() 19 | print(f"Busses have {max_bloom_elements} max elements of size {arraysize}, and a failure rate of 0.01\n") 20 | 21 | 22 | # Try catching a random bus, return TRUE if correct caught, FALSE if on wrong bus 23 | def catch_correct_bus(self): 24 | mydestination = random.choice(self.validstops) 25 | 26 | while True: 27 | nextbus = random.choice(self.mybusses) 28 | 29 | if nextbus.bloomhas(mydestination): 30 | return nextbus.listhas(mydestination) 31 | else: 32 | # wait for next bus 33 | pass 34 | 35 | 36 | def test_city(self, num_people): 37 | successes = 0 38 | 39 | for person in range(num_people): 40 | success = self.catch_correct_bus() 41 | successes += success 42 | 43 | return successes / num_people 44 | 45 | 46 | 47 | class Bus: 48 | 49 | def __init__(self, route:list, max_bloom_elements:int = 12000, failure_rate:float = 0.01): 50 | # adds are deterministic, so two arrays of same definitions will add items to same indices. 51 | self.liststops = list(route) 52 | self.bloomstops = BloomFilter(max_elements = max_bloom_elements, error_rate=failure_rate) 53 | self.bloomstops.num_probes_k = 4 54 | for r in route: 55 | self.bloomstops.add(r) 56 | 57 | def bloomhas(self, stop): 58 | return stop in self.bloomstops 59 | 60 | def listhas(self, stop): 61 | return stop in self.liststops 62 | 63 | def getarrayinfo(self): 64 | return self.bloomstops.num_bits_m 65 | 66 | def paintbus(self): 67 | return 0 -------------------------------------------------------------------------------- /scrapeDB.py: -------------------------------------------------------------------------------- 1 | 'curl -X GET --header "Accept: application/json" --header "Authorization: Bearer 8dfc4d27587b1addbd6aae85884501e2" "https://api.deutschebahn.com/freeplan/v1/location/b"' 2 | 3 | """ 4 | Can curl every location by going through German alphabet 5 | https://api.deutschebahn.com/freeplan/v1/location/b 6 | """ 7 | 8 | import requests 9 | dealpha = "A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z, Ä, Ö, Ü, ß".lower().split(', ') 10 | 11 | """ 12 | GET ALL STOPS 13 | """ 14 | allstations = dict() 15 | for letter in dealpha: 16 | try: 17 | r = requests.get(f'https://api.deutschebahn.com/freeplan/v1/location/{letter}', params={"Accept": "application/json", "Authorization": "Bearer 8dfc4d27587b1addbd6aae85884501e2"}) 18 | rj = r.json() 19 | print(rj[0]) 20 | # [{'name': 'Berlin Hbf', 'lon': 13.369549, 'lat': 52.525589, 'id': 8011160}, 21 | # {'name': 'Bielefeld Hbf', 'lon': 8.532723, 'lat': 52.029259, 'id': 8000036}, 22 | # ... ] 23 | for stop in rj: 24 | allstations[stop['id']] = stop['name'] 25 | except Exception as e: 26 | print(e, e.__doc__) 27 | 28 | 29 | 30 | """ 31 | GET ALL TIME TABLES 32 | """ 33 | alltimetables = dict() 34 | alltrains = dict() 35 | for station_id in allstations.keys(): 36 | try: 37 | alltimetables[station_id] = dict() 38 | r = requests.get(f'https://api.deutschebahn.com/freeplan/v1/departureBoard/{station_id}?date=2022-01-15', params={"Accept": "application/json", "Authorization": "Bearer 8dfc4d27587b1addbd6aae85884501e2"}) 39 | rj = r.json() 40 | 41 | for train in rj: 42 | n = train['name'] 43 | if train['track'] not in alltimetables[station_id]: 44 | alltimetables[station_id][train['track']] = [] 45 | 46 | alltimetables[station_id][train['track']].append(n) 47 | if n not in alltrains: 48 | alltrains[n] = [] 49 | 50 | alltrains[n].append(station_id) 51 | 52 | except Exception as e: 53 | print(e, e.__doc__) 54 | 55 | """ 56 | { 57 | "name": "ICE 618", 58 | "type": "ICE", 59 | "boardId": 8000105, 60 | "stopId": 8000105, 61 | "stopName": "Frankfurt(Main)Hbf", 62 | "dateTime": "2022-01-15T04:46", 63 | "track": "19", 64 | "detailsId": "594015%2F200777%2F989772%2F296881%2F80%3fstation_evaId%3D8000105" 65 | }, 66 | { 67 | "name": "ICE 827", 68 | "type": "ICE", 69 | "boardId": 8000105, 70 | "stopId": 8000105, 71 | "stopName": "Frankfurt(Main)Hbf", 72 | "dateTime": "2022-01-15T04:54", 73 | "track": "7", 74 | "detailsId": "982503%2F330968%2F696966%2F20982%2F80%3fstation_evaId%3D8000105" 75 | }, 76 | """ 77 | 78 | 79 | 80 | import pickle 81 | print('dumping') 82 | with open("./pickles/allstations.pkl", 'wb') as F: 83 | pickle.dump(allstations, F) 84 | 85 | with open("./pickles/alltimetables.pkl", 'wb') as F: 86 | pickle.dump(alltimetables, F) 87 | 88 | with open("./pickles/alltrains.pkl", 'wb') as F: 89 | pickle.dump(alltrains, F) 90 | print('dumped') 91 | 92 | 93 | print(alltrains) 94 | input() 95 | print(alltimetables) 96 | input() 97 | print(allstations) 98 | -------------------------------------------------------------------------------- /readpdf.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pdfplumber 3 | import string 4 | from collections import defaultdict 5 | r = re.compile("\n([\w-]+ \d+).*?\n.*?(\(\d\d:\d\d\)[\w\W\n]+?\(\d\d:\d\d\)).*?\n") 6 | 7 | 8 | def alphafilter(s): 9 | r = [] 10 | for i in s: 11 | if i.isalpha() or i == ' ': 12 | r.append(i) 13 | return ''.join(r) 14 | 15 | 16 | results = [] 17 | pleaseadd = True 18 | with pdfplumber.open('../Downloads/db.pdf') as pdf: 19 | for x in pdf.pages: 20 | foundroutes = r.findall(x.extract_text()) 21 | for route in foundroutes: 22 | trainname = route[0] 23 | stops = [alphafilter(s).strip() for s in route[1].split('-')] 24 | for stop in stops: 25 | if len(stop) > 50: 26 | pleaseadd = False 27 | if pleaseadd: 28 | results.append((trainname, stops)) 29 | pleaseadd = True 30 | 31 | allstops = defaultdict(list) 32 | for train in results: 33 | name = train[0] 34 | for stop in train[1]: 35 | allstops[stop].append(name) 36 | # [('EC 86', '(13:50)- Verona Porta Nuova - Bolzano / Bozen - (Brennero/Brenner (17:48/18:00)) -\nInnsbruck (18:36/18:40) - (Kufstein (19:24/19:26)) - München (20:25)')] 37 | 38 | 39 | 40 | 41 | 42 | """ 43 | 672 Druckzeitraum : 01.04.2019 - 10.06.2019 44 | ICE-T 1630 45 | ICE-T 1630 (Ber 46 | (Berlin-Rummelsburg (Triebzuganlage)) - Berlin-Gesundbrunnen (12:54)- Berlin - Berlin Südkreuz - Halle 47 | (Saale) - Erfurt - Frankfurt (Main) (16:56) 48 | Bln-Rummels Tanl - Frankfurt(M), Mi+Fr+So 24.V.-07.VI., auch 05., 07., 30.IV., 10.VI., nicht 31.V., 05.VI. 49 | Bln-Rummels Tanl - Frankfurt(M), Fr+So 12.IV.-19.V., auch 18., 22.IV., nicht 19., 21.IV. 50 | Tfz1:411 Hg230 0t BrH193 318m EB a (WC); )p( 51 | + Tfz1:411 Hg230 0t BrH193 185m EB a (WC); )p( 52 | Œ; ­ 53 | 7 54 | Apmzf 28 F7 310 1690 BRGBT FF 1635 55 | ABpmz 27 78430 78679 56 | WRmz 26 57 | 06 Bpmdz 24 58 | Bpmz 23 59 | Bpmbz 22 60 | Bpmzf 21 61 | 70 62 | 06) Fahrradstellplätze buchbar ab 1. April 63 | ICE-T 1631 64 | ICE-T 1631 65 | Frankfurt (Main) (15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)- 66 | (Berlin-Rummelsburg (Triebzuganlage)) 67 | Frankfurt(M) - Bln-Rummels Tanl, tgl. 01.-07.IV., 20.V.-10.VI. 68 | Frankfurt(M) - Bln-Rummels Tanl, tgl. 08.IV.-19.V. 69 | FF Tfz1:411 Hg230 0t BrH193 318m EB a (WC); )p( 70 | Reisendensicherung gemäß Richtlinie 419.3312 (siehe Anhang Xb) 71 | Œ; ­ 72 | 7 73 | Bpmzf 31 F5 311 a) 78674 FF BRGBT 78434 74 | Bpmbz 32 1627 1695 75 | Bpmz 33 1697 76 | Bpmkz 37 77 | Apmzf 38 78 | Bpmzf 21 F7 310 1636 FF BRGBT 1701 79 | Bpmbz 22 1634 1197 80 | Bpmz 23 78471 81 | 06 Bpmdz 24 1691 82 | WRmz 26 78444 83 | ABpmz 27 92736 84 | Apmzf 28 85 | 71 86 | 06) Fahrradstellplätze buchbar ab 1. April a) Fr auch 18., 30.IV., 29.V., nicht 19.IV. 87 | 2019_ZpAR Wi_B2 88 | 89 | 90 | 91 | 92 | 93 | 94 | Output for each page: 95 | [('ICE-T 1630', '(12:54)- Berlin - Berlin Südkreuz - Halle\n(Saale) - Erfurt - Frankfurt (Main) (16:56)'), ('ICE-T 1631', '(15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)')] 96 | RRRR[0] 97 | ('ICE-T 1630', '(12:54)- Berlin - Berlin Südkreuz - Halle\n(Saale) - Erfurt - Frankfurt (Main) (16:56)') 98 | RRRR[1] 99 | ('ICE-T 1631', '(15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)') 100 | """ 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BusStopBloomfilter 2 | Proof of Concept: 3 | 4 | Navigating arbitrarily complex bus systems can be tricky, especially as bus listings and routes are listed in O(N) style wall-posters. Can we use bloom filters to make routing decisions O(1) for the passenger? 5 | 6 | Here is a short look on two problems: 7 | 1) how can we visualize a bit array? 8 | 2) how few bits can we get away with to avoid *too many* false positives? 9 | 10 | My technique can visualize 192 bits, and achieve a < 0.5% false positive rate on my Deutsche Bahn system simulation. 11 | 12 | Following is a video presentation of this idea which is the same information that is here just in video format, honestly you shouldn't even watch it unless you really like sitting through presentations: 13 | 14 | https://www.youtube.com/watch?v=rGu2NkZPEzo 15 | 16 | Population will expand and public transportation will become more complex. How can we rethink how we do public transportation signage to keep finding your bus (and/or your bus stop) independent of how many buses are arriving or how complex their routes are? 17 | 18 | 19 | 20 | 21 | Example: 22 | 23 | **You will need to know the "fingerprint" of your destination. This is the fingerprint for Stendal Hbf (three bits of information).** 24 | > ![image](https://user-images.githubusercontent.com/9337973/182030849-b8a04c14-814e-4075-ae7a-ba321cb38768.png) 25 | 26 | 27 | 28 | 29 | **Every bus stop's destination placard will show the accumulated fingerprints of all the buses coming through. This is the fingerprint of a matching stop. There is at least one bus passing through this stop which will also go to your destination. 30 | Notice the 'e', 'G', and 'g' all have their symbols present.** 31 | > ![image](https://user-images.githubusercontent.com/9337973/182030864-cec24c37-6703-48cf-8b2a-f1063a3fd06d.png) 32 | 33 | 34 | 35 | **Here we have a bus/train coming. This is its fingerprint. Your destination fingerprint does not match, so you do not get on.** 36 | > ![image](https://user-images.githubusercontent.com/9337973/182030880-0c80d261-aad8-44a1-8eef-9039ab5cf026.png) 37 | 38 | 39 | 40 | **This bus coming is correct, your fingerprint matches. Get on!** 41 | > ![image](https://user-images.githubusercontent.com/9337973/182030893-d9b97e27-671d-45c2-b200-73b69e2123de.png) 42 | 43 | 44 | 45 | Benefits are that 46 | * scaling your bus system does not lead to more complicated fingerprints 47 | * riders must not read signage in a given language or alphabet 48 | * very quick to see where you need to be and which bus you should board 49 | 50 | 51 | Other visualization techiniques I've thought of were using a 2d array of emojis. If we could get 15x15 emojis we would have 225 bits of info, we could subdivide that into nine 5x5 square tiles like a tic-tac-toe board, give each square a unique background colour, and tell people, if you have a crocodile in the upper left blue tiles and... etc. Emojis may look tacky, but they are distinct images, compact in size, free, prevalent, and people are already largely familiar with that particular set of pictures, so it would be more intuitive. With any system I can think of, high-res screens or printed pieces of paper would be required to display the level of detail symbols need. 52 | 53 | 54 | 127 | 128 | The full emoji table could look like (randomly generated): 129 | 130 | ![image](https://user-images.githubusercontent.com/9337973/227025986-f332118c-8043-4f30-9855-298d28b5f93f.png) 131 | 132 | but in practice it would more resemble: 133 | 134 | ![image](https://user-images.githubusercontent.com/9337973/227028035-7e947ca1-7555-4362-b72d-e0107bf3afa6.png) 135 | 136 | 137 | With a fingerprint looking like: 138 | 139 | ![image](https://user-images.githubusercontent.com/9337973/227028595-7bd83d05-4c4a-49a1-964c-dda2080d7243.png) 140 | 141 | -------------------------------------------------------------------------------- /paint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib.patches import Polygon 4 | import string 5 | 6 | 7 | def plot_masked(ax): 8 | """Plots the image masked outside of a circle using masked arrays""" 9 | # Calculate the distance from the center of the circle 10 | ix, iy = np.meshgrid(np.arange(100), np.arange(100)) 11 | distance = np.sqrt((ix - 0)**2 + (iy - 0)**2) 12 | 13 | # Mask portions of the data array outside of the circle 14 | 15 | 16 | def draw_circle(ax, color): 17 | if color=='0': 18 | return 19 | pts = np.array([[0, 0], 20 | [0, 0.25], 21 | [0.25, 0] ]) 22 | p = Polygon(pts, color="black") 23 | ax.add_patch(p) 24 | pts = np.array([[1, 0], 25 | [1, 0.25], 26 | [0.75, 0] ]) 27 | p = Polygon(pts, color="black") 28 | ax.add_patch(p) 29 | pts = np.array([[1, 1], 30 | [1, 0.75], 31 | [0.75, 1] ]) 32 | p = Polygon(pts, color="black") 33 | ax.add_patch(p) 34 | pts = np.array([[0, 1], 35 | [0, 0.75], 36 | [0.25, 1] ]) 37 | p = Polygon(pts, color='black') 38 | ax.add_patch(p) 39 | 40 | 41 | def draw_top_triangle(ax, color:str): 42 | # All shapes on a 5x5 square 43 | # ax = plt.gca() PASS IN, SHARED 44 | if color=='0': 45 | return 46 | pts = np.array([[0, 1], 47 | [1, 1], 48 | [0, 0] ]) 49 | 50 | p = Polygon(pts, facecolor="pink", edgecolor="gray") 51 | ax.add_patch(p) 52 | 53 | 54 | def draw_bot_triangle(ax, color:str): 55 | # All shapes on a 5x5 square 56 | # ax = plt.gca() PASS IN, SHARED 57 | if color=='0': 58 | return 59 | pts = np.array([[1, 0], 60 | [1, 1], 61 | [0, 0] ]) 62 | 63 | p = Polygon(pts, facecolor="lightblue", edgecolor="gray") 64 | ax.add_patch(p) 65 | 66 | 67 | def draw_stripe(ax, color:bool): 68 | # All shapes on a 5x5 square 69 | # ax = plt.gca() PASS IN, SHARED 70 | if color=='0': 71 | return 72 | pts = np.array([[0, 0.8], 73 | [0, 1], 74 | [.2, 1], 75 | [1, .2], 76 | [1, 0], 77 | [.8, 0] ]) 78 | 79 | p = Polygon(pts, facecolor="lightyellow", edgecolor="gray" ) 80 | ax.add_patch(p) 81 | 82 | def draw_left_dot(ax, appear:str): 83 | if appear == '0': 84 | return 85 | ax.text(0.05, 0.45, '.', fontsize=40, color='black') 86 | 87 | 88 | def draw_right_dot(ax, appear:str): 89 | if appear == '0': 90 | return 91 | ax.text(0.8, 0.45, '.', fontsize=40, color='black') 92 | 93 | def draw_letter(ax, letter): 94 | ax.text(0.4 if len(letter) == 1 else 0.3, 0.33, letter, fontsize=40, color="black", fontdict={'family':'monospace'}) 95 | 96 | #def plot_circle(ax, ) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | # TODO: Left side or right side letters 105 | # Black or LIME color 106 | class Canvas: 107 | 108 | def __init__(self): 109 | self.lettersize = 6 # triangle, triangle, stripe, corners, leftdot, rightdot 110 | self.allLetters = [] 111 | LETS = list(string.ascii_uppercase) + ['?', "\$", "*","!", "%", "@"] 112 | lets = list(string.ascii_lowercase) + ['??', "\$\$", "**","!!", "%%", "@@"] 113 | for i in range(len(lets)): 114 | self.allLetters.append(LETS[i]) 115 | self.allLetters.append(lets[i]) 116 | self.alphabetsize = len(self.allLetters) * self.lettersize 117 | #print(len(self.allLetters)) 118 | 119 | 120 | def getAlphabet(self, bitarray): 121 | #print("Alphabet size var is", self.alphabetsize, end='\t') 122 | #print("My given alphabetarray is", len(bitarray)) 123 | for i in range(len(bitarray) // self.alphabetsize+1): 124 | alphabet = bitarray[i * self.alphabetsize: min((i + 1) * self.alphabetsize, len(bitarray))] 125 | yield alphabet 126 | 127 | 128 | """ 129 | alphabet is the bit string of length 35*4 = 140, representing all the info of the alphabet 130 | """ 131 | def getLetter(self, alphabet): 132 | for i in range(len(alphabet) // self.lettersize): 133 | letter = alphabet[ i * self.lettersize : min((i + 1) * self.lettersize, len(alphabet))] 134 | yield letter, self.allLetters[i] 135 | 136 | 137 | def interpretAlphabet(self, alphabetarray, allLetters, numletters=8): 138 | try: 139 | n_cols = 8 140 | n_rows = min(8, numletters // n_cols) 141 | fig, axes = plt.subplots(n_rows, n_cols) 142 | 143 | letters = self.getLetter(alphabetarray) 144 | 145 | for row_num in range(n_rows): 146 | for col_num in range(n_cols): 147 | letterbits, lettertext = next(letters) 148 | ax = axes[row_num][col_num] 149 | ax.set_xticks([]) 150 | ax.set_xlabel(None) 151 | ax.set_yticks([]) 152 | ax.set_xlabel(None) 153 | if allLetters or sum((1 for x in letterbits if x=="1")): 154 | draw_top_triangle(ax, letterbits[0]) 155 | draw_bot_triangle(ax, letterbits[1]) 156 | draw_stripe(ax, letterbits[2]) 157 | draw_letter(ax, lettertext) 158 | draw_left_dot(ax, letterbits[4]) 159 | draw_right_dot(ax, letterbits[5]) 160 | draw_circle(ax, letterbits[3]) 161 | except StopIteration: 162 | return 163 | 164 | 165 | 166 | # allLetters is if you want every letter to be drawn, regardless of whether it has bits flipped. 167 | def visualizeByteString(self, bloomstop, title='', allLetters=False): 168 | bitstring = ''.join((format(x, '#034b')[2:] for x in bloomstop.backend.array_)) 169 | alphabets = self.getAlphabet(bitstring) 170 | #print("Total bits is", len(bitstring), end='\t') 171 | for i, alphabet in enumerate(alphabets): 172 | if len(alphabet) < 1: 173 | continue 174 | #print(f"Alphabet {i} is {len(alphabet)}", end='\t') 175 | #plt.figure(i + 1) 176 | self.interpretAlphabet(alphabet, allLetters, len(bitstring)//self.lettersize) 177 | plt.suptitle(title) 178 | figManager = plt.get_current_fig_manager() 179 | figManager.window.showMaximized() 180 | plt.show() 181 | 182 | -------------------------------------------------------------------------------- /deutschebahn.py: -------------------------------------------------------------------------------- 1 | import random 2 | from bloom_filter2 import BloomFilter 3 | import statistics 4 | import paint 5 | VISUALIZE = True 6 | NUM_PROBES_K = 3 7 | 8 | """ 9 | I Want 2 things here: 10 | 11 | -- 1> fingerprints on every train 12 | 13 | -- 2> fingerprints on every station platform 14 | 15 | ---- 1.5> metrics on how small to make the trains fingerprints before more than 1% FP 16 | 17 | ---- 2.5> probability to get FP for standing at the wrong platform 18 | 19 | 20 | Only 1) and 2) can be engineered for in this class. 21 | 22 | """ 23 | 24 | 25 | class System: 26 | 27 | def __init__(self, alltrains, allstations, alltimetables, max_bloom_elements=30, failure_rate=0.05): 28 | self.alltrains = alltrains 29 | self.allstations = allstations 30 | self.alltimetables = alltimetables 31 | self.max_bloom_elements = max_bloom_elements 32 | self.failure_rate = failure_rate 33 | # Will be train name : bloom filter 34 | self.trains = dict() 35 | # Will be station, track : bloom filter 36 | self.stationtracks = dict() 37 | self.canvas = paint.Canvas() 38 | 39 | print(f"City:\nNum routes = {len(alltrains)}\nnum_stops_each = {statistics.mean( (len(x) for x in alltrains.values()))}\ntotal stops = {len(allstations)}") 40 | 41 | # Every Bloom filter must have the same instantiation params 42 | # But each track on each platform has its own Bloom Filter 43 | # Along with each train. 44 | # Each track is the collection of each train. 45 | # Each train is the collection of each of its stops. 46 | # Use BF.union(otherBF) 47 | 48 | #{'ICE 618': [8000013, 8000050, 8000080, 8000085, 8000105, 8096021, 8070003, 8000281, 8004158, 8000271, 8000294, 8000096, 8005556, 8000170], 49 | # 'ICE 616': [8000013, 8004158, 8000294, 50 | for name, stops in alltrains.items(): 51 | self.trains[name] = self.Train(name, stops, max_bloom_elements=max_bloom_elements, failure_rate=failure_rate) 52 | 53 | #Just grab one for array info 54 | arraysize, probe_rate = self.trains[name].bloomstops.num_bits_m, self.trains[name].bloomstops.num_probes_k 55 | print(f"Blooms have {max_bloom_elements} max elements of size {arraysize}, \nand a failure rate of {failure_rate} and {probe_rate} probes\n") 56 | # {8000013: {'1': ['ICE 618', 'ICE 616', 'ICE 614', 'IC 2366', 'ICE 692', 'TGV 9576', 'ICE 802', 'ICE 612', 57 | # 'IC 1296', 'ICE 690', 'IC 1298', 'ICE 610'], '4': ['ICE 619', 'NJ 40491', 'NJ 421', 'RJX 63', 'ICE 511'], 58 | for station, tracks in alltimetables.items(): 59 | self.stationtracks[station] = dict() 60 | 61 | for track, trains in tracks.items(): 62 | self.stationtracks[station][track] = self.Train(f"{allstations[station]} {track}", [], max_bloom_elements=max_bloom_elements, failure_rate=failure_rate) 63 | 64 | # These are all trains arriving to this track 65 | for trainname in trains: 66 | self.stationtracks[station][track].bloomstops.union( self.trains[trainname].bloomstops ) 67 | 68 | 69 | def bf_has(self, key, bloomfilter): 70 | for bitno in bloomfilter.probe_bitnoer(bloomfilter, key): 71 | if not bloomfilter.backend.is_set(bitno): 72 | return False 73 | return True 74 | 75 | 76 | def test_track_coherence(self, trials): 77 | passengers = self.passenger(trials, alltrains=self.alltrains) 78 | passstops = passengers.next() 79 | 80 | for startstation, endstation in passstops: 81 | waitingtrack = None 82 | 83 | if VISUALIZE: 84 | passbf = BloomFilter(max_elements=self.max_bloom_elements, error_rate=self.failure_rate) 85 | passbf.num_probes_k = NUM_PROBES_K 86 | passbf.add(endstation) 87 | print("\nPASSENGER TICKET") 88 | self.canvas.visualizeByteString(passbf, title=f"Passenger Ticket \nfrom \"{self.allstations[startstation]}\" to \"{self.allstations[endstation]}\"") 89 | showtrack = True 90 | # Passenger starts at station 91 | for trackname, trackbloom in random.sample(self.stationtracks[startstation].items(), len(self.stationtracks[startstation])): #jenky shuffle 92 | if self.bf_has(endstation, trackbloom.bloomstops): 93 | #passenger chooses this track to wait 94 | waitingtrack = trackname 95 | if VISUALIZE: 96 | print("\tCORRECT TRACK FILTER") 97 | self.canvas.visualizeByteString(trackbloom.bloomstops, title=f"Correct track, which will have the correct train") 98 | break 99 | else: 100 | if VISUALIZE and showtrack: 101 | print("\tWRONG TRACK FILTER") 102 | self.canvas.visualizeByteString(trackbloom.bloomstops, title=f"Wrong track, which will not have the right train") 103 | showtrack = False 104 | 105 | if waitingtrack: 106 | showtrain = True 107 | for arrivingtrain in self.alltimetables[startstation][waitingtrack]: 108 | 109 | #if the train has the right fingerprint 110 | if self.bf_has(endstation, self.trains[arrivingtrain].bloomstops): 111 | if endstation in self.trains[arrivingtrain].liststops: 112 | passengers.success() 113 | if VISUALIZE: 114 | print("\t\tCORRECT TRAIN FILTER") 115 | self.canvas.visualizeByteString(self.trains[arrivingtrain].bloomstops, title=f"Correct train, which is going to {self.allstations[endstation]}. Stoplist:\n{', '.join([self.allstations[x] for x in self.trains[arrivingtrain].liststops])}") 116 | break 117 | else: 118 | # WRong train 119 | if VISUALIZE and showtrain: 120 | print("\t\tWRONG TRAIN FILTER") 121 | self.canvas.visualizeByteString(self.trains[arrivingtrain].bloomstops, title=f"Wrong train, which is NOT going to {self.allstations[endstation]}. Stoplist:\n{', '.join([self.allstations[x] for x in self.trains[arrivingtrain].liststops])}") 122 | showtrain = False 123 | 124 | print(passengers.accuracy()) 125 | 126 | 127 | 128 | 129 | # Passenger finds correct track 130 | 131 | # Passenger finds correct train 132 | 133 | # Passenger sees if train actually takes them to their destination 134 | 135 | 136 | 137 | def test_train_coherence(self, trials): 138 | pass #TODO 139 | 140 | def test_trains(self, num_people): 141 | successes = 0 142 | 143 | for person in range(num_people): 144 | #success = self.catch_correct_bus() 145 | successes += 1#success 146 | 147 | return successes / num_people 148 | 149 | 150 | class passenger: 151 | def __init__(self, count, alltrains): 152 | self.count = count 153 | self.trials = 0 154 | self.successes = 0 155 | self.alltrains = alltrains 156 | self.trainnames = list(self.alltrains.keys()) 157 | 158 | 159 | def next(self): 160 | for p in range(self.count): 161 | self.trials += 1 162 | train = random.choice(self.trainnames) 163 | 164 | while len(self.alltrains[train]) < 2: 165 | train = random.choice(self.trainnames) 166 | 167 | stops = random.sample(self.alltrains[train], 2) 168 | yield (stops[0], stops[1]) 169 | 170 | def __next__(self): 171 | return self.next() 172 | 173 | def success(self): 174 | self.successes += 1 175 | 176 | def accuracy(self): 177 | print(f"Trials: {self.trials}, Successes: {self.successes}, Accuracy: {self.successes/self.trials}") 178 | return self.successes / self.trials 179 | 180 | 181 | class Train: 182 | def __init__(self, name, route:list, max_bloom_elements: int, failure_rate: float): 183 | # adds are deterministic, so two arrays of same definitions will add items to same indices. 184 | self.liststops = list(route) 185 | self.name = name 186 | self.bloomstops = BloomFilter(max_elements = max_bloom_elements, error_rate=failure_rate) 187 | self.bloomstops.num_probes_k = NUM_PROBES_K 188 | for r in route: 189 | self.bloomstops.add(r) 190 | 191 | def bloomhas(self, stop): 192 | return stop in self.bloomstops 193 | 194 | def listhas(self, stop): 195 | return stop in self.liststops 196 | 197 | def getarrayinfo(self): 198 | return self.bloomstops.num_bits_m 199 | 200 | def paintbus(self): 201 | return 0 --------------------------------------------------------------------------------