├── testDB.py
├── main.py
├── cityplan.py
├── scrapeDB.py
├── readpdf.py
├── README.md
├── paint.py
└── deutschebahn.py


/testDB.py:
--------------------------------------------------------------------------------
 1 | import deutschebahn
 2 | import pickle
 3 | 
 4 | with open('./pickles/allstations.pkl', 'rb') as F:
 5 |     allstations = pickle.load(F)
 6 | 
 7 | with open('./pickles/alltimetables.pkl', 'rb') as F:
 8 |     alltimetables = pickle.load(F)
 9 | 
10 | with open('./pickles/alltrains.pkl', 'rb') as F:
11 |     alltrains = pickle.load(F)
12 | 
13 | germany = deutschebahn.System(alltrains, allstations, alltimetables, max_bloom_elements=30, failure_rate=0.01)
14 | germany.test_track_coherence(1000)
15 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # This is a sample Python script.
 2 | 
 3 | # Press Shift+F10 to execute it or replace it with your code.
 4 | # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
 5 | 
 6 | 
 7 | # Chicago has almost 2000 buses
 8 | # 150 bus routes
 9 | # 12,000 bus stops
10 | # 10 train lines
11 | # max 50 stops / bus
12 | # 150 train stations
13 | import sys
14 | import cityplan
15 | import paint
16 | # city = cityplan.City(num_bus_routes=150, num_stops_each=50, stops_in_city=12000, max_stops=104)
17 | # print(f"{city.test_city(100000)} success rate")
18 | 
19 | city = cityplan.City(num_bus_routes=2000, num_stops_each=15, stops_in_city=2000, max_bloom_elements=50)
20 | print(f"{city.test_city(10000)} success rate")
21 | 
22 | 
23 | sys.exit()
24 | 
25 | # Show bit array total for a bus.
26 | # The last line has 27 leading 0's (final 5 good)
27 | # 997 bytes
28 | print('\n'.join((format(x, '#034b')[2:] for x in city.mybusses[-1].bloomstops.backend.array_)))
29 | bitstring = ''.join((format(x, '#034b')[2:] for x in city.mybusses[-1].bloomstops.backend.array_))
30 | #ending = bitstring[-5:]
31 | #bitstring = bitstring[:-32] + ending
32 | print(bitstring)
33 | print(len(bitstring))
34 | 
35 | not_existing_route = list(city.validstops)[-1]
36 | mynotstop = cityplan.Bus(route=[not_existing_route] , max_bloom_elements=40)
37 | print("Not Looking for stop: ", mynotstop.liststops)
38 | #num_stops_each=30, stops_in_city=500, max_bloom_elements=40)
39 | 
40 | existing_route = list(city.mybusses[-1].liststops)[-1]
41 | mystop = cityplan.Bus(route=[existing_route] , max_bloom_elements=40)
42 | print("Looking for stop: ", mystop.liststops)
43 | print("in", city.mybusses[-1].liststops )
44 | 
45 | 
46 | c = paint.Canvas()
47 | c.visualizeByteString(''.join((format(x, '#034b')[2:] for x in mynotstop.bloomstops.backend.array_)))
48 | c.visualizeByteString(''.join((format(x, '#034b')[2:] for x in mystop.bloomstops.backend.array_)))
49 | c.visualizeByteString(bitstring, allLetters=True)


--------------------------------------------------------------------------------
/cityplan.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from bloom_filter2 import BloomFilter
 3 | 
 4 | 
 5 | class City:
 6 | 
 7 |     def __init__(self, num_bus_routes=100, num_stops_each = 30, stops_in_city=500, max_bloom_elements=100):
 8 |         self.mybusses = []
 9 |         self.validstops = set()
10 |         print(f"City:\nNum routes = {num_bus_routes}\nnum_stops_each = {num_stops_each}\ntotal stops = {stops_in_city}")
11 | 
12 |         for i in range(num_bus_routes):
13 |             stops = random.sample(range(stops_in_city), num_stops_each)
14 |             self.mybusses.append(Bus(stops, max_bloom_elements, 0.01))
15 |             self.validstops.update(stops)
16 | 
17 |         self.validstops = list(self.validstops)
18 |         arraysize = self.mybusses[-1].getarrayinfo()
19 |         print(f"Busses have {max_bloom_elements} max elements of size {arraysize}, and a failure rate of 0.01\n")
20 | 
21 | 
22 |     # Try catching a random bus, return TRUE if correct caught, FALSE if on wrong bus
23 |     def catch_correct_bus(self):
24 |         mydestination = random.choice(self.validstops)
25 | 
26 |         while True:
27 |             nextbus = random.choice(self.mybusses)
28 | 
29 |             if nextbus.bloomhas(mydestination):
30 |                 return nextbus.listhas(mydestination)
31 |             else:
32 |                 # wait for next bus
33 |                 pass
34 | 
35 | 
36 |     def test_city(self, num_people):
37 |         successes = 0
38 | 
39 |         for person in range(num_people):
40 |             success = self.catch_correct_bus()
41 |             successes += success
42 | 
43 |         return successes / num_people
44 | 
45 | 
46 | 
47 | class Bus:
48 | 
49 |     def __init__(self, route:list, max_bloom_elements:int = 12000, failure_rate:float = 0.01):
50 |         # adds are deterministic, so two arrays of same definitions will add items to same indices.
51 |         self.liststops = list(route)
52 |         self.bloomstops = BloomFilter(max_elements = max_bloom_elements, error_rate=failure_rate)
53 |         self.bloomstops.num_probes_k = 4
54 |         for r in route:
55 |             self.bloomstops.add(r)
56 | 
57 |     def bloomhas(self, stop):
58 |         return stop in self.bloomstops
59 | 
60 |     def listhas(self, stop):
61 |         return stop in self.liststops
62 | 
63 |     def getarrayinfo(self):
64 |         return self.bloomstops.num_bits_m
65 | 
66 |     def paintbus(self):
67 |         return 0


--------------------------------------------------------------------------------
/scrapeDB.py:
--------------------------------------------------------------------------------
 1 | 'curl -X GET --header "Accept: application/json" --header "Authorization: Bearer  8dfc4d27587b1addbd6aae85884501e2" "https://api.deutschebahn.com/freeplan/v1/location/b"'
 2 | 
 3 | """
 4 | Can curl every location by going through German alphabet
 5 | https://api.deutschebahn.com/freeplan/v1/location/b
 6 | """
 7 | 
 8 | import requests
 9 | dealpha = "A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z, Ä, Ö, Ü, ß".lower().split(', ')
10 | 
11 | """
12 | GET ALL STOPS
13 | """
14 | allstations = dict()
15 | for letter in dealpha:
16 |     try:
17 |         r = requests.get(f'https://api.deutschebahn.com/freeplan/v1/location/{letter}', params={"Accept": "application/json", "Authorization": "Bearer  8dfc4d27587b1addbd6aae85884501e2"})
18 |         rj = r.json()
19 |         print(rj[0])
20 |         # [{'name': 'Berlin Hbf', 'lon': 13.369549, 'lat': 52.525589, 'id': 8011160},
21 |         #  {'name': 'Bielefeld Hbf', 'lon': 8.532723, 'lat': 52.029259, 'id': 8000036},
22 |         #   ... ]
23 |         for stop in rj:
24 |             allstations[stop['id']] = stop['name']
25 |     except Exception as e:
26 |         print(e, e.__doc__)
27 | 
28 | 
29 | 
30 | """
31 | GET ALL TIME TABLES
32 | """
33 | alltimetables = dict()
34 | alltrains = dict()
35 | for station_id in allstations.keys():
36 |     try:
37 |         alltimetables[station_id] = dict()
38 |         r = requests.get(f'https://api.deutschebahn.com/freeplan/v1/departureBoard/{station_id}?date=2022-01-15', params={"Accept": "application/json", "Authorization": "Bearer  8dfc4d27587b1addbd6aae85884501e2"})
39 |         rj = r.json()
40 | 
41 |         for train in rj:
42 |             n = train['name']
43 |             if train['track'] not in alltimetables[station_id]:
44 |                 alltimetables[station_id][train['track']] = []
45 | 
46 |             alltimetables[station_id][train['track']].append(n)
47 |             if n not in alltrains:
48 |                 alltrains[n] = []
49 | 
50 |             alltrains[n].append(station_id)
51 | 
52 |     except Exception as e:
53 |         print(e, e.__doc__)
54 | 
55 | """  
56 |   {
57 |     "name": "ICE 618",
58 |     "type": "ICE",
59 |     "boardId": 8000105,
60 |     "stopId": 8000105,
61 |     "stopName": "Frankfurt&#x0028;Main&#x0029;Hbf",
62 |     "dateTime": "2022-01-15T04:46",
63 |     "track": "19",
64 |     "detailsId": "594015%2F200777%2F989772%2F296881%2F80%3fstation_evaId%3D8000105"
65 |   },
66 |   {
67 |     "name": "ICE 827",
68 |     "type": "ICE",
69 |     "boardId": 8000105,
70 |     "stopId": 8000105,
71 |     "stopName": "Frankfurt&#x0028;Main&#x0029;Hbf",
72 |     "dateTime": "2022-01-15T04:54",
73 |     "track": "7",
74 |     "detailsId": "982503%2F330968%2F696966%2F20982%2F80%3fstation_evaId%3D8000105"
75 |   },
76 | """
77 | 
78 | 
79 | 
80 | import pickle
81 | print('dumping')
82 | with open("./pickles/allstations.pkl", 'wb') as F:
83 |     pickle.dump(allstations, F)
84 | 
85 | with open("./pickles/alltimetables.pkl", 'wb') as F:
86 |     pickle.dump(alltimetables, F)
87 | 
88 | with open("./pickles/alltrains.pkl", 'wb') as F:
89 |     pickle.dump(alltrains, F)
90 | print('dumped')
91 | 
92 | 
93 | print(alltrains)
94 | input()
95 | print(alltimetables)
96 | input()
97 | print(allstations)
98 | 


--------------------------------------------------------------------------------
/readpdf.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import pdfplumber
  3 | import string
  4 | from collections import defaultdict
  5 | r = re.compile("\n([\w-]+ \d+).*?\n.*?(\(\d\d:\d\d\)[\w\W\n]+?\(\d\d:\d\d\)).*?\n")
  6 | 
  7 | 
  8 | def alphafilter(s):
  9 |     r = []
 10 |     for i in s:
 11 |         if i.isalpha() or i == ' ':
 12 |             r.append(i)
 13 |     return ''.join(r)
 14 | 
 15 | 
 16 | results = []
 17 | pleaseadd = True
 18 | with pdfplumber.open('../Downloads/db.pdf') as pdf:
 19 |     for x in pdf.pages:
 20 |         foundroutes = r.findall(x.extract_text())
 21 |         for route in foundroutes:
 22 |             trainname = route[0]
 23 |             stops = [alphafilter(s).strip() for s in route[1].split('-')]
 24 |             for stop in stops:
 25 |                 if len(stop) > 50:
 26 |                     pleaseadd = False
 27 |             if pleaseadd:
 28 |                 results.append((trainname, stops))
 29 |             pleaseadd = True
 30 | 
 31 | allstops = defaultdict(list)
 32 | for train in results:
 33 |     name = train[0]
 34 |     for stop in train[1]:
 35 |         allstops[stop].append(name)
 36 | # [('EC 86', '(13:50)- Verona Porta Nuova - Bolzano / Bozen - (Brennero/Brenner (17:48/18:00)) -\nInnsbruck (18:36/18:40) - (Kufstein (19:24/19:26)) - München (20:25)')]
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | """
 43 | 672 Druckzeitraum : 01.04.2019 - 10.06.2019
 44 | ICE-T 1630
 45 | ICE-T 1630 (Ber
 46 | (Berlin-Rummelsburg (Triebzuganlage)) - Berlin-Gesundbrunnen (12:54)- Berlin - Berlin Südkreuz - Halle
 47 | (Saale) - Erfurt - Frankfurt (Main) (16:56)
 48 | Bln-Rummels Tanl - Frankfurt(M), Mi+Fr+So 24.V.-07.VI., auch 05., 07., 30.IV., 10.VI., nicht 31.V., 05.VI.
 49 | Bln-Rummels Tanl - Frankfurt(M), Fr+So 12.IV.-19.V., auch 18., 22.IV., nicht 19., 21.IV.
 50 | Tfz1:411 Hg230 0t BrH193 318m EB a (WC); )p(
 51 | + Tfz1:411 Hg230 0t BrH193 185m EB a (WC); )p(
 52 | Œ; ­
 53 | 7
 54 | Apmzf 28 F7 310 1690 BRGBT FF 1635
 55 | ABpmz 27 78430 78679
 56 | WRmz 26
 57 | 06 Bpmdz 24
 58 | Bpmz 23
 59 | Bpmbz 22
 60 | Bpmzf 21
 61 | 70
 62 | 06) Fahrradstellplätze buchbar ab 1. April
 63 | ICE-T 1631
 64 | ICE-T 1631 <b>
 65 | Frankfurt (Main) (15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)-
 66 | (Berlin-Rummelsburg (Triebzuganlage))
 67 | Frankfurt(M) - Bln-Rummels Tanl, tgl. 01.-07.IV., 20.V.-10.VI.
 68 | Frankfurt(M) - Bln-Rummels Tanl, tgl. 08.IV.-19.V.
 69 | FF Tfz1:411 Hg230 0t BrH193 318m EB a (WC); )p(
 70 | Reisendensicherung gemäß Richtlinie 419.3312 (siehe Anhang Xb)
 71 | Œ; ­
 72 | 7
 73 | Bpmzf 31 F5 311 a) 78674 FF BRGBT 78434
 74 | Bpmbz 32 1627 1695
 75 | Bpmz 33 1697
 76 | Bpmkz 37
 77 | Apmzf 38
 78 | Bpmzf 21 F7 310 1636 FF BRGBT 1701
 79 | Bpmbz 22 1634 1197
 80 | Bpmz 23 78471
 81 | 06 Bpmdz 24 1691
 82 | WRmz 26 78444
 83 | ABpmz 27 92736
 84 | Apmzf 28
 85 | 71
 86 | 06) Fahrradstellplätze buchbar ab 1. April a) Fr auch 18., 30.IV., 29.V., nicht 19.IV.
 87 | 2019_ZpAR Wi_B2
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | Output for each page:
 95 | [('ICE-T 1630', '(12:54)- Berlin - Berlin Südkreuz - Halle\n(Saale) - Erfurt - Frankfurt (Main) (16:56)'), ('ICE-T 1631', '(15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)')]
 96 | RRRR[0]
 97 | ('ICE-T 1630', '(12:54)- Berlin - Berlin Südkreuz - Halle\n(Saale) - Erfurt - Frankfurt (Main) (16:56)')
 98 | RRRR[1]
 99 | ('ICE-T 1631', '(15:02)- Erfurt - Halle (Saale) - Berlin Südkreuz - Berlin - Berlin-Gesundbrunnen (19:10)')
100 | """
101 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # BusStopBloomfilter
  2 | Proof of Concept:
  3 | 
  4 | Navigating arbitrarily complex bus systems can be tricky, especially as bus listings and routes are listed in O(N) style wall-posters. Can we use bloom filters to make routing decisions O(1) for the passenger? 
  5 | 
  6 | Here is a short look on two problems: 
  7 | 1) how can we visualize a bit array?
  8 | 2) how few bits can we get away with to avoid *too many* false positives? 
  9 | 
 10 | My technique can visualize 192 bits, and achieve a < 0.5% false positive rate on my Deutsche Bahn system simulation. 
 11 | 
 12 | Following is a video presentation of this idea which is the same information that is here just in video format, honestly you shouldn't even watch it unless you really like sitting through presentations:
 13 | 
 14 | https://www.youtube.com/watch?v=rGu2NkZPEzo
 15 | 
 16 | Population will expand and public transportation will become more complex. How can we rethink how we do public transportation signage to keep finding your bus (and/or your bus stop) independent of how many buses are arriving or how complex their routes are? 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | Example:
 22 | 
 23 | **You will need to know the "fingerprint" of your destination. This is the fingerprint for Stendal Hbf (three bits of information).** 
 24 | > ![image](https://user-images.githubusercontent.com/9337973/182030849-b8a04c14-814e-4075-ae7a-ba321cb38768.png) 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | **Every bus stop's destination placard will show the accumulated fingerprints of all the buses coming through. This is the fingerprint of a matching stop. There is at least one bus passing through this stop which will also go to your destination. 
 30 | Notice the 'e', 'G', and 'g' all have their symbols present.** 
 31 | > ![image](https://user-images.githubusercontent.com/9337973/182030864-cec24c37-6703-48cf-8b2a-f1063a3fd06d.png) 
 32 | 
 33 | 
 34 | 
 35 | **Here we have a bus/train coming. This is its fingerprint. Your destination fingerprint does not match, so you do not get on.** 
 36 | > ![image](https://user-images.githubusercontent.com/9337973/182030880-0c80d261-aad8-44a1-8eef-9039ab5cf026.png) 
 37 | 
 38 | 
 39 | 
 40 | **This bus coming is correct, your fingerprint matches. Get on!** 
 41 | > ![image](https://user-images.githubusercontent.com/9337973/182030893-d9b97e27-671d-45c2-b200-73b69e2123de.png) 
 42 | 
 43 | 
 44 | 
 45 | Benefits are that 
 46 | * scaling your bus system does not lead to more complicated fingerprints
 47 | * riders must not read signage in a given language or alphabet
 48 | * very quick to see where you need to be and which bus you should board
 49 | 
 50 | 
 51 | Other visualization techiniques I've thought of were using a 2d array of emojis. If we could get 15x15 emojis we would have 225 bits of info, we could subdivide that into nine 5x5 square tiles like a tic-tac-toe board, give each square a unique background colour, and tell people, if you have a crocodile in the upper left blue tiles and... etc. Emojis may look tacky, but they are distinct images, compact in size, free, prevalent, and people are already largely familiar with that particular set of pictures, so it would be more intuitive. With any system I can think of, high-res screens or printed pieces of paper would be required to display the level of detail symbols need. 
 52 | 
 53 | 
 54 | <!---
 55 | <table>
 56 |   <tr>
 57 |     <td style="background:lightblue; border: 1px solid #333;">
 58 |       ⛺️ 💚 👀 🖊 ☝️<br>
 59 |       🗳 🏃 🔵 🙆 ☀️<br>
 60 |       🏫 📙 💯 ✏️ 🍺<br>
 61 |       🍂 〽️ 🚎 🌽 🔱<br>
 62 |       🔼 🕓 😼 💳 🐏
 63 |     </td>
 64 |     <td style="background:lightgreen; border: 1px solid #333;">  
 65 |       🐮 🐬 🚹 ⏏ 🌁<br>
 66 |       😴 🚧 🏑 ♨️ 🎪<br>
 67 |       😸 🅰️ 🙋 ♿️ ✊<br>
 68 |       🐊 🕟 🐨 🏚 🚬<br>
 69 |       ™️ 💰 😰 🏡 🍿
 70 |     </td>
 71 |     <td style="background:yellow;  border: 1px solid #333;">
 72 |       💜 ☮ 🛣 ☢ 🐙<br>
 73 |       💉 ⛈ 🎞 🎍 🕊<br>
 74 |       🍭 🙁 👻 👎 📌<br>
 75 |       🚀 📘 🐛 🌖 🍖<br>
 76 |       🔥 ✡ 💗 🔯 ➿
 77 |     </td>
 78 |   </tr>
 79 |   <tr>
 80 |     <td style="background:lightpink;  border: 1px solid #333;">
 81 |       🦄 👷 👵 🌵 😌<br>
 82 |       😖 🏐 ✴️ 🔙 🗃<br>
 83 |       🚅 🏟 🃏 🐜 ✍<br>
 84 |       🎑 👰 🎻 👐 🎵<br>
 85 |       🤔 🔒 🌧 🚺 💈
 86 |     </td>
 87 |     <td style=" border: 1px solid #333;">  
 88 |       🍐 🌪 😄 🎓 🌃<br>
 89 |       ⏳ 🔨 🚊 ◽️ ↖️<br>
 90 |       ⚙ 😡 🔊 🎙 ➰<br>
 91 |       🔋 ♈️ 😚 ⚔ ⏪<br>
 92 |       🕒 ☺️ 🍳 #️⃣ 🗼
 93 |     </td>
 94 |     <td style="background:lightgray;  border: 1px solid #333;">
 95 |       🎫 🕉 ☹ 🔏 💹<br>
 96 |       📕 🎿 🌾 🐻 🏞<br>
 97 |       ❎ 💣 🐩 🔝 🚞<br>
 98 |       👚 🍎 🚖 😽 ☑️<br>
 99 |       🗿 🌛 🈹 📞 🏓
100 |     </td>
101 |   </tr>
102 |   <tr>
103 |     <td style="background:orange; border: 1px solid #333;">
104 |       🐰 🎮 🚲 🚛 😹<br>
105 |       🎚 🚣 🍨 🚂 🍋<br>
106 |       🚭 ⛑ 📝 🚋 🗝<br>
107 |       🙅 🕵 🗯 🕡 🛁<br>
108 |       🌇 👶 🏖 ❣ 🐿
109 |     </td>
110 |     <td style="background:violet; border: 1px solid #333;">  
111 |       🕞 🍊 🈸 9️⃣ 😁<br>
112 |       😝 🏤 ⬇️ 🏕 ⏸<br>
113 |       😲 🎹 🍵 🚁 🌠<br>
114 |       🤒 🔂 🌶 ✂️ 🐃<br>
115 |       🐦 ♐️ 🦂 🐇 2️⃣
116 |     </td>
117 |     <td style="background:black;  border: 1px solid #333;">
118 |       ⚓️ 🍝 💬 🍜 👑<br>
119 |       😤 🎄 🚒 👧 🌕<br>
120 |       🌈 🏀 💦 👄 🏰<br>
121 |       🚉 💐 🍁 👳 🙂<br>
122 |       🔎 ⭐️ 🚢 👪 😪
123 |     </td>
124 |   </tr>
125 |   </table>
126 | -->
127 | 
128 | The full emoji table could look like (randomly generated):
129 | 
130 | ![image](https://user-images.githubusercontent.com/9337973/227025986-f332118c-8043-4f30-9855-298d28b5f93f.png)
131 | 
132 | but in practice it would more resemble:
133 | 
134 | ![image](https://user-images.githubusercontent.com/9337973/227028035-7e947ca1-7555-4362-b72d-e0107bf3afa6.png)
135 | 
136 | 
137 | With a fingerprint looking like:
138 | 
139 | ![image](https://user-images.githubusercontent.com/9337973/227028595-7bd83d05-4c4a-49a1-964c-dda2080d7243.png)
140 | 
141 | 


--------------------------------------------------------------------------------
/paint.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from matplotlib.patches import Polygon
  4 | import string
  5 | 
  6 | 
  7 | def plot_masked(ax):
  8 |     """Plots the image masked outside of a circle using masked arrays"""
  9 |     # Calculate the distance from the center of the circle
 10 |     ix, iy = np.meshgrid(np.arange(100), np.arange(100))
 11 |     distance = np.sqrt((ix - 0)**2 + (iy - 0)**2)
 12 | 
 13 |     # Mask portions of the data array outside of the circle
 14 | 
 15 | 
 16 | def draw_circle(ax, color):
 17 |     if color=='0':
 18 |         return
 19 |     pts = np.array([[0, 0],
 20 |                     [0, 0.25],
 21 |                     [0.25, 0] ])
 22 |     p = Polygon(pts, color="black")
 23 |     ax.add_patch(p)
 24 |     pts = np.array([[1, 0],
 25 |                     [1, 0.25],
 26 |                     [0.75, 0] ])
 27 |     p = Polygon(pts, color="black")
 28 |     ax.add_patch(p)
 29 |     pts = np.array([[1, 1],
 30 |                     [1, 0.75],
 31 |                     [0.75, 1] ])
 32 |     p = Polygon(pts, color="black")
 33 |     ax.add_patch(p)
 34 |     pts = np.array([[0, 1],
 35 |                     [0, 0.75],
 36 |                     [0.25, 1] ])
 37 |     p = Polygon(pts, color='black')
 38 |     ax.add_patch(p)
 39 | 
 40 | 
 41 | def draw_top_triangle(ax, color:str):
 42 |     # All shapes on a 5x5 square
 43 |     # ax = plt.gca() PASS IN, SHARED
 44 |     if color=='0':
 45 |         return
 46 |     pts = np.array([[0, 1],
 47 |                     [1, 1],
 48 |                     [0, 0] ])
 49 | 
 50 |     p = Polygon(pts, facecolor="pink", edgecolor="gray")
 51 |     ax.add_patch(p)
 52 | 
 53 | 
 54 | def draw_bot_triangle(ax, color:str):
 55 |     # All shapes on a 5x5 square
 56 |     # ax = plt.gca() PASS IN, SHARED
 57 |     if color=='0':
 58 |         return
 59 |     pts = np.array([[1, 0],
 60 |                     [1, 1],
 61 |                     [0, 0] ])
 62 | 
 63 |     p = Polygon(pts, facecolor="lightblue", edgecolor="gray")
 64 |     ax.add_patch(p)
 65 | 
 66 | 
 67 | def draw_stripe(ax, color:bool):
 68 |     # All shapes on a 5x5 square
 69 |     # ax = plt.gca() PASS IN, SHARED
 70 |     if color=='0':
 71 |         return
 72 |     pts = np.array([[0, 0.8],
 73 |                     [0, 1],
 74 |                     [.2, 1],
 75 |                     [1, .2],
 76 |                     [1, 0],
 77 |                     [.8, 0] ])
 78 | 
 79 |     p = Polygon(pts, facecolor="lightyellow", edgecolor="gray" )
 80 |     ax.add_patch(p)
 81 | 
 82 | def draw_left_dot(ax, appear:str):
 83 |     if appear == '0':
 84 |         return
 85 |     ax.text(0.05, 0.45, '.', fontsize=40, color='black')
 86 | 
 87 | 
 88 | def draw_right_dot(ax, appear:str):
 89 |     if appear == '0':
 90 |         return
 91 |     ax.text(0.8, 0.45, '.', fontsize=40, color='black')
 92 | 
 93 | def draw_letter(ax, letter):
 94 |     ax.text(0.4 if len(letter) == 1 else 0.3, 0.33, letter, fontsize=40, color="black", fontdict={'family':'monospace'})
 95 | 
 96 | #def plot_circle(ax, )
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | # TODO: Left side or right side letters
105 | # Black or LIME color
106 | class Canvas:
107 | 
108 |     def __init__(self):
109 |         self.lettersize = 6  # triangle, triangle, stripe, corners, leftdot, rightdot
110 |         self.allLetters = []
111 |         LETS = list(string.ascii_uppercase) + ['?', "\$", "*","!", "%", "@"]
112 |         lets = list(string.ascii_lowercase) + ['??', "\$\$", "**","!!", "%%", "@@"]
113 |         for i in range(len(lets)):
114 |             self.allLetters.append(LETS[i])
115 |             self.allLetters.append(lets[i])
116 |         self.alphabetsize = len(self.allLetters) * self.lettersize
117 |         #print(len(self.allLetters))
118 | 
119 | 
120 |     def getAlphabet(self, bitarray):
121 |         #print("Alphabet size var is", self.alphabetsize, end='\t')
122 |         #print("My given alphabetarray is", len(bitarray))
123 |         for i in range(len(bitarray) // self.alphabetsize+1):
124 |             alphabet = bitarray[i * self.alphabetsize: min((i + 1) * self.alphabetsize, len(bitarray))]
125 |             yield alphabet
126 | 
127 | 
128 |     """
129 |     alphabet is the bit string of length 35*4 = 140, representing all the info of the alphabet
130 |     """
131 |     def getLetter(self, alphabet):
132 |         for i in range(len(alphabet) // self.lettersize):
133 |             letter = alphabet[ i * self.lettersize : min((i + 1) * self.lettersize, len(alphabet))]
134 |             yield letter, self.allLetters[i]
135 | 
136 | 
137 |     def interpretAlphabet(self, alphabetarray, allLetters, numletters=8):
138 |         try:
139 |             n_cols = 8
140 |             n_rows =  min(8, numletters // n_cols)
141 |             fig, axes = plt.subplots(n_rows, n_cols)
142 | 
143 |             letters = self.getLetter(alphabetarray)
144 | 
145 |             for row_num in range(n_rows):
146 |                 for col_num in range(n_cols):
147 |                     letterbits, lettertext = next(letters)
148 |                     ax = axes[row_num][col_num]
149 |                     ax.set_xticks([])
150 |                     ax.set_xlabel(None)
151 |                     ax.set_yticks([])
152 |                     ax.set_xlabel(None)
153 |                     if allLetters or sum((1 for x in letterbits if x=="1")):
154 |                         draw_top_triangle(ax, letterbits[0])
155 |                         draw_bot_triangle(ax, letterbits[1])
156 |                         draw_stripe(ax, letterbits[2])
157 |                         draw_letter(ax, lettertext)
158 |                         draw_left_dot(ax, letterbits[4])
159 |                         draw_right_dot(ax, letterbits[5])
160 |                         draw_circle(ax, letterbits[3])
161 |         except StopIteration:
162 |             return
163 | 
164 | 
165 | 
166 |     # allLetters is if you want every letter to be drawn, regardless of whether it has bits flipped.
167 |     def visualizeByteString(self, bloomstop, title='', allLetters=False):
168 |         bitstring = ''.join((format(x, '#034b')[2:] for x in bloomstop.backend.array_))
169 |         alphabets = self.getAlphabet(bitstring)
170 |         #print("Total bits is", len(bitstring), end='\t')
171 |         for i, alphabet in enumerate(alphabets):
172 |             if len(alphabet) < 1:
173 |                 continue
174 |             #print(f"Alphabet {i} is {len(alphabet)}", end='\t')
175 |             #plt.figure(i + 1)
176 |             self.interpretAlphabet(alphabet, allLetters, len(bitstring)//self.lettersize)
177 |         plt.suptitle(title)
178 |         figManager = plt.get_current_fig_manager()
179 |         figManager.window.showMaximized()
180 |         plt.show()
181 | 
182 | 


--------------------------------------------------------------------------------
/deutschebahn.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from bloom_filter2 import BloomFilter
  3 | import statistics
  4 | import paint
  5 | VISUALIZE = True
  6 | NUM_PROBES_K = 3
  7 | 
  8 | """
  9 | I Want 2 things here:
 10 | 
 11 | -- 1> fingerprints on every train
 12 | 
 13 | -- 2> fingerprints on every station platform
 14 | 
 15 | ---- 1.5> metrics on how small to make the trains fingerprints before more than 1% FP
 16 | 
 17 | ---- 2.5> probability to get FP for standing at the wrong platform
 18 | 
 19 | 
 20 | Only 1) and 2) can be engineered for in this class. 
 21 | 
 22 | """
 23 | 
 24 | 
 25 | class System:
 26 | 
 27 |     def __init__(self, alltrains, allstations, alltimetables, max_bloom_elements=30, failure_rate=0.05):
 28 |         self.alltrains = alltrains
 29 |         self.allstations = allstations
 30 |         self.alltimetables = alltimetables
 31 |         self.max_bloom_elements = max_bloom_elements
 32 |         self.failure_rate = failure_rate
 33 |         # Will be train name : bloom filter
 34 |         self.trains = dict()
 35 |         # Will be station, track : bloom filter
 36 |         self.stationtracks = dict()
 37 |         self.canvas = paint.Canvas()
 38 | 
 39 |         print(f"City:\nNum routes = {len(alltrains)}\nnum_stops_each = {statistics.mean( (len(x) for x in alltrains.values()))}\ntotal stops = {len(allstations)}")
 40 | 
 41 |         # Every Bloom filter must have the same instantiation params
 42 |         # But each track on each platform has its own Bloom Filter
 43 |         # Along with each train.
 44 |         # Each track is the collection of each train.
 45 |         # Each train is the collection of each of its stops.
 46 |         # Use BF.union(otherBF)
 47 | 
 48 |         #{'ICE 618': [8000013, 8000050, 8000080, 8000085, 8000105, 8096021, 8070003, 8000281, 8004158, 8000271, 8000294, 8000096, 8005556, 8000170],
 49 |         # 'ICE 616': [8000013, 8004158, 8000294,
 50 |         for name, stops in alltrains.items():
 51 |             self.trains[name] = self.Train(name, stops, max_bloom_elements=max_bloom_elements, failure_rate=failure_rate)
 52 | 
 53 |         #Just grab one for array info
 54 |         arraysize, probe_rate = self.trains[name].bloomstops.num_bits_m, self.trains[name].bloomstops.num_probes_k
 55 |         print(f"Blooms have {max_bloom_elements} max elements of size {arraysize}, \nand a failure rate of {failure_rate} and {probe_rate} probes\n")
 56 |         # {8000013: {'1': ['ICE 618', 'ICE 616', 'ICE 614', 'IC 2366', 'ICE 692', 'TGV 9576', 'ICE 802', 'ICE 612',
 57 |         # 'IC 1296', 'ICE 690', 'IC 1298', 'ICE 610'], '4': ['ICE 619', 'NJ 40491', 'NJ 421', 'RJX 63', 'ICE 511'],
 58 |         for station, tracks in alltimetables.items():
 59 |             self.stationtracks[station] = dict()
 60 | 
 61 |             for track, trains in tracks.items():
 62 |                 self.stationtracks[station][track] = self.Train(f"{allstations[station]} {track}", [], max_bloom_elements=max_bloom_elements, failure_rate=failure_rate)
 63 | 
 64 |                 # These are all trains arriving to this track
 65 |                 for trainname in trains:
 66 |                     self.stationtracks[station][track].bloomstops.union( self.trains[trainname].bloomstops )
 67 | 
 68 | 
 69 |     def bf_has(self, key, bloomfilter):
 70 |         for bitno in bloomfilter.probe_bitnoer(bloomfilter, key):
 71 |             if not bloomfilter.backend.is_set(bitno):
 72 |                 return False
 73 |         return True
 74 | 
 75 | 
 76 |     def test_track_coherence(self, trials):
 77 |         passengers = self.passenger(trials, alltrains=self.alltrains)
 78 |         passstops = passengers.next()
 79 | 
 80 |         for startstation, endstation in passstops:
 81 |             waitingtrack = None
 82 | 
 83 |             if VISUALIZE:
 84 |                 passbf = BloomFilter(max_elements=self.max_bloom_elements, error_rate=self.failure_rate)
 85 |                 passbf.num_probes_k = NUM_PROBES_K
 86 |                 passbf.add(endstation)
 87 |                 print("\nPASSENGER TICKET")
 88 |                 self.canvas.visualizeByteString(passbf, title=f"Passenger Ticket \nfrom \"{self.allstations[startstation]}\" to \"{self.allstations[endstation]}\"")
 89 |             showtrack = True
 90 |             # Passenger starts at station
 91 |             for trackname, trackbloom in random.sample(self.stationtracks[startstation].items(), len(self.stationtracks[startstation])): #jenky shuffle
 92 |                 if self.bf_has(endstation, trackbloom.bloomstops):
 93 |                     #passenger chooses this track to wait
 94 |                     waitingtrack = trackname
 95 |                     if VISUALIZE:
 96 |                         print("\tCORRECT TRACK FILTER")
 97 |                         self.canvas.visualizeByteString(trackbloom.bloomstops, title=f"Correct track, which will have the correct train")
 98 |                     break
 99 |                 else:
100 |                     if VISUALIZE and showtrack:
101 |                         print("\tWRONG TRACK FILTER")
102 |                         self.canvas.visualizeByteString(trackbloom.bloomstops, title=f"Wrong track, which will not have the right train")
103 |                         showtrack = False
104 | 
105 |             if waitingtrack:
106 |                 showtrain = True
107 |                 for arrivingtrain in self.alltimetables[startstation][waitingtrack]:
108 | 
109 |                     #if the train has the right fingerprint
110 |                     if self.bf_has(endstation, self.trains[arrivingtrain].bloomstops):
111 |                         if endstation in self.trains[arrivingtrain].liststops:
112 |                             passengers.success()
113 |                             if VISUALIZE:
114 |                                 print("\t\tCORRECT TRAIN FILTER")
115 |                                 self.canvas.visualizeByteString(self.trains[arrivingtrain].bloomstops, title=f"Correct train, which is going to {self.allstations[endstation]}. Stoplist:\n{', '.join([self.allstations[x] for x in self.trains[arrivingtrain].liststops])}")
116 |                         break
117 |                     else:
118 |                         # WRong train
119 |                         if VISUALIZE and showtrain:
120 |                             print("\t\tWRONG TRAIN FILTER")
121 |                             self.canvas.visualizeByteString(self.trains[arrivingtrain].bloomstops, title=f"Wrong train, which is NOT going to {self.allstations[endstation]}. Stoplist:\n{', '.join([self.allstations[x] for x in self.trains[arrivingtrain].liststops])}")
122 |                             showtrain = False
123 | 
124 |         print(passengers.accuracy())
125 | 
126 | 
127 | 
128 | 
129 |         # Passenger finds correct track
130 | 
131 |         # Passenger finds correct train
132 | 
133 |         # Passenger sees if train actually takes them to their destination
134 | 
135 | 
136 | 
137 |     def test_train_coherence(self, trials):
138 |         pass #TODO
139 | 
140 |     def test_trains(self, num_people):
141 |         successes = 0
142 | 
143 |         for person in range(num_people):
144 |             #success = self.catch_correct_bus()
145 |             successes += 1#success
146 | 
147 |         return successes / num_people
148 | 
149 | 
150 |     class passenger:
151 |         def __init__(self, count, alltrains):
152 |             self.count = count
153 |             self.trials = 0
154 |             self.successes = 0
155 |             self.alltrains = alltrains
156 |             self.trainnames = list(self.alltrains.keys())
157 | 
158 | 
159 |         def next(self):
160 |             for p in range(self.count):
161 |                 self.trials += 1
162 |                 train = random.choice(self.trainnames)
163 | 
164 |                 while len(self.alltrains[train]) < 2:
165 |                     train = random.choice(self.trainnames)
166 | 
167 |                 stops = random.sample(self.alltrains[train], 2)
168 |                 yield (stops[0], stops[1])
169 | 
170 |         def __next__(self):
171 |             return self.next()
172 | 
173 |         def success(self):
174 |             self.successes += 1
175 | 
176 |         def accuracy(self):
177 |             print(f"Trials: {self.trials}, Successes: {self.successes}, Accuracy: {self.successes/self.trials}")
178 |             return self.successes / self.trials
179 | 
180 | 
181 |     class Train:
182 |         def __init__(self, name, route:list, max_bloom_elements: int, failure_rate: float):
183 |             # adds are deterministic, so two arrays of same definitions will add items to same indices.
184 |             self.liststops = list(route)
185 |             self.name = name
186 |             self.bloomstops = BloomFilter(max_elements = max_bloom_elements, error_rate=failure_rate)
187 |             self.bloomstops.num_probes_k = NUM_PROBES_K
188 |             for r in route:
189 |                 self.bloomstops.add(r)
190 | 
191 |         def bloomhas(self, stop):
192 |             return stop in self.bloomstops
193 | 
194 |         def listhas(self, stop):
195 |             return stop in self.liststops
196 | 
197 |         def getarrayinfo(self):
198 |             return self.bloomstops.num_bits_m
199 | 
200 |         def paintbus(self):
201 |             return 0


--------------------------------------------------------------------------------