""".format(
122 | file_name.split("\\")[-1], current_path + "\\csv")
123 |
124 |
125 |
126 | if __name__ == "__main__":
127 | Waze_Police_Scraper.start_script()
128 | try:
129 | load()
130 | app.run()
131 | except KeyboardInterrupt:
132 | exit()
--------------------------------------------------------------------------------
/templates/map.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
25 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/Waze_Police_Scraper.py:
--------------------------------------------------------------------------------
1 | import json
2 | from selenium import webdriver
3 | from selenium.common.exceptions import WebDriverException
4 | from browsermobproxy import Server
5 | from requests.exceptions import MissingSchema
6 | import requests
7 | import timeit
8 | import time
9 | import folium
10 | import subprocess
11 | import platform
12 | import os
13 | from datetime import datetime
14 |
15 | print("""Waze Police Scraper
16 |
17 | Waze Police Scraper will open the Mozilla Firefox browser, onto Waze's live map website.\n
18 | It'll scrape all the police's locations from your preferred location,\n
19 | by moving the mouse to that location. It also scrapes police traps.\n
20 | Every cop that's scraped also has its own dataset. It has all the geographic location,\n
21 | but also the number of up votes on its existence by Waze's users, confidence, and reliability, by Waze itself.\n
22 | Sometimes speed can also be included.\n
23 | You can view all that in the map that will be generated after the program finished scraping.\n
24 |
25 | Instructions:
26 |
27 | Choose how much seconds do you want the program to scrape police. That time is for the user to move to another location,
28 | to scrape police vehicles / Traps reported by Waze's users.\n
29 | If you press Enter, without entering any number, the number will be the default, 5 seconds, which is also the recommended value.
30 | After the Firebox browser launched and you're done scraping, just close the browser, and wait 5 seconds.
31 | After that, go to localhost:5000 or 127.0.0.1:5000 and you'll be presented with a map showing all the scraped police,
32 | and by clicking on them you'll be able to see more information on them like coordinates,
33 | type of police (vehicle or trap), speed, number of up votes by Waze's users, and confidence and reliability estimated by Waze.\n
34 | Also, you can download the scraped data as JSON, XLS (Excel), and CSV.
35 | """)
36 |
37 | def directory_exist(file_type):
38 | return os.path.isdir('./{}'.format(file_type))
39 |
40 | def clear_screen():
41 | if platform.system() == "Windows":
42 | subprocess.Popen("cls", shell=True).communicate()
43 | else: # Linux and Mac
44 | print("\033c", end="")
45 |
46 |
47 | def save_config():
48 | date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
49 | file_name = "config.config"
50 | f = open(file_name,
51 | "w") # Don't need to check if file exists, because if it's not, the mode "w" will create it anyway.
52 | f.write(date)
53 |
54 |
55 | def personalised_info():
56 | auto_or_manual = input(
57 | "Do you want the software to scrape the waze map Automatically? (Answer with 'a') ") # Pretty much useless becasue there is no other option
58 |
59 | sec = input(
60 | "Every how much seconds do you want the scraper to scrape the data? maximum is 30 seconds, while minimum is 5 seconds. By not inputting anything the value will be set to the recommended value of 5 seconds. ")
61 | save_config()
62 | return auto_or_manual, sec
63 |
64 |
65 | def start_server():
66 | global server, proxy, driver
67 | dict = {'port': 8080}
68 | server = Server("C:\\Users\\Yahav Bahat\\Downloads\\browsermob-proxy-2.1.4-bin\\browsermob-proxy-2.1.4\\bin\\browsermob-proxy")
69 |
70 | server.start()
71 | proxy = server.create_proxy()
72 |
73 | # proxy.wait_for_traffic_to_stop(6000, 9000)
74 |
75 | profile = webdriver.FirefoxProfile()
76 | profile.set_proxy(proxy.selenium_proxy())
77 | driver = webdriver.Firefox(executable_path="C:\\Users\\Yahav Bahat\\Downloads\\geckodriver-v0.26.0-win64\\geckodriver.exe",
78 | firefox_profile=profile)
79 | # Navigate to the application home page
80 | driver.get("https://www.waze.com/livemap?utm_source=waze_website&utm_campaign=waze_website")
81 |
82 | return driver
83 |
84 |
85 | urls = []
86 | count = 1
87 | good_index = []
88 | data_parsed = {}
89 | inner_nested_data_parsed = {}
90 | data_list = []
91 | key_counts_with_subtype = []
92 |
93 |
94 | def get_data(sec):
95 | global count, inner_nested_data_parsed
96 | start = timeit.timeit() # Measure time
97 | # tag the har(network logs) with a name
98 | har = proxy.new_har("waze_{0}.format(i)")
99 | # Finding the URL requests where the data is stored in JSON format
100 | har = str(har)
101 | str_1 = "https://www.waze.com/il-rtserver/web/TGeoRSS?"
102 | str_2 = "&types=alerts%2Ctraffic%2Cusers"
103 | indx_1 = har.find(str_1)
104 | indx_2 = har.find(str_2)
105 | url = har[indx_1:indx_2]
106 | url = url + str_2
107 | urls.append(url)
108 | # Loading data
109 | for d in urls:
110 | if d == str_2: # User does not move
111 | print("Please move to your preffered location.")
112 | urls.remove(d)
113 | pass
114 | else:
115 | data = requests.get(url)
116 | data = data.text
117 | if not "DOCTYPE" in data:
118 | data = json.loads(data)
119 | end = timeit.timeit() # Measure time
120 | print("Time Taken to fetch the data: {} seconds".format(end - start)) # Time to get data
121 | urls.remove(d)
122 | # Finding indexes to scrape
123 | for x in range(len(data["alerts"])):
124 | if data["alerts"][x]["type"] == "POLICE":
125 | good_index.append(x)
126 | # Scraping data
127 | for x in good_index:
128 | inner_nested_data_parsed["type_"] = (data["alerts"][x]["type"])
129 | if data["alerts"][x]["subtype"]:
130 | inner_nested_data_parsed["subtype"] = (data["alerts"][x]["subtype"])
131 | else:
132 | pass
133 | inner_nested_data_parsed["country"] = (data["alerts"][x]["country"])
134 | inner_nested_data_parsed["nThumbsUp"] = (data["alerts"][x]["nThumbsUp"])
135 | inner_nested_data_parsed["confidence"] = (data["alerts"][x]["confidence"])
136 | inner_nested_data_parsed["reliability"] = (data["alerts"][x]["reliability"])
137 | inner_nested_data_parsed["speed"] = (data["alerts"][x]["speed"])
138 | inner_nested_data_parsed["location_x"] = (data["alerts"][x]["location"]["x"])
139 | inner_nested_data_parsed["location_y"] = (data["alerts"][x]["location"]["y"])
140 | data_parsed[count] = inner_nested_data_parsed
141 | data_list.append(data_parsed)
142 | inner_nested_data_parsed = {}
143 | count += 1
144 | else:
145 | print("Data is inaccessible, wait {} seconds to to try again.".format(sec))
146 | print("Scraped {} Policeman / Police cars.".format(count - 1)) # Count equels one, so subtract
147 | return data_list
148 |
149 |
150 | def map(key_counts_with_subtype, data_list):
151 | global data_parsed
152 |
153 | k = 0
154 | location_x_start = data_list[0][1]["location_x"]
155 | location_y_start = data_list[0][1]["location_y"]
156 |
157 | m = folium.Map( # Map configuration
158 | location=[location_y_start, location_x_start],
159 | zoom_start=12, smooth_factor=2
160 | )
161 |
162 | tooltip = 'POLICE' # Will be "subtype" and if "subtype" == "", than it'll be "type_"
163 |
164 | # Checking if subtype exists, and if he is, in what index of the data
165 | for x in data_list:
166 | for key in x.keys():
167 | try:
168 | if data_list[0][key]["subtype"]:
169 | key_counts_with_subtype.append(key)
170 | except KeyError:
171 | pass
172 |
173 | for iter_2 in range(len(data_list)):
174 | data_parsed = data_list[iter_2]
175 | for key_count in range(1, len(data_parsed) + 1):
176 | try:
177 | if key_count == key_counts_with_subtype[key_count - 1]: # Index starts from zero
178 | subtype = data_parsed[key_count]["subtype"]
179 | else:
180 | type_ = data_parsed[key_count]["type_"]
181 | except IndexError: # There's no more elements in 'key_counts_with_subtype' list
182 | print(key_count, type(key_count))
183 | type_ = data_parsed[key_count]["type_"]
184 | country = data_parsed[key_count]["country"]
185 | nThumbsUp = data_parsed[key_count]["nThumbsUp"]
186 | confidence = data_parsed[key_count]["confidence"]
187 | reliability = data_parsed[key_count]["reliability"]
188 | speed = data_parsed[key_count]["speed"]
189 | location_x = data_parsed[key_count]["location_x"]
190 | location_y = data_parsed[key_count]["location_y"]
191 | try:
192 | if int(key_count) == key_counts_with_subtype[int(key_count) - 1]:
193 | string = 'subtype: {0}\ncountry: {1}\nnThumbsUp: {2}\nconfidence: {3}\nreliability: {4}\nspeed: {5}\nlocation x: {6}\nlocation y: {7}'.format(
194 | subtype, country, nThumbsUp, confidence, reliability, speed, location_x, location_y)
195 | else:
196 | string = 'type: {0}\ncountry: {1}\nnThumbsUp: {2}\nconfidence: {3}\nreliability: {4}\nspeed: {5}\nlocation_x: {6}\nlocation_y: {7}'.format(
197 | type, country, nThumbsUp, confidence, reliability, speed, location_x, location_y)
198 | except IndexError: # There's no more elements in 'key_counts_with_subtype' list
199 | string = 'type: {0}\ncountry: {1}\nnThumbsUp: {2}\nconfidence: {3}\nreliability: {4}\nspeed: {5}\nlocation x: {6}\nlocation y: {7}'.format(
200 | type, country, nThumbsUp, confidence, reliability, speed, location_x, location_y)
201 |
202 | folium.Marker([location_y, location_x], popup=folium.Popup(string, max_width=450), tooltip=tooltip).add_to(m)
203 |
204 | m.save('templates//map.html')
205 |
206 |
207 | def start_script():
208 | if directory_exist("json"):
209 | pass
210 | else:
211 | os.mkdir('json')
212 | auto_or_manual, sec = personalised_info()
213 | err = False
214 | if auto_or_manual == "A" or auto_or_manual == "a":
215 | if sec.isdigit(): # Could have contained all than in a function
216 | driver = start_server()
217 | while not err:
218 | try:
219 | title = driver.title # Checking if user didn't close FireFox
220 | time.sleep(int(sec))
221 | try:
222 | data_list = get_data(sec)
223 | except KeyboardInterrupt:
224 | exit() # Cleaning the errors when the user wants to quit
225 | f = open("data - its not the json data.txt", "w", encoding="utf-8")
226 | f.write(str(data_list))
227 | f.close()
228 | except (WebDriverException, MissingSchema):
229 | err = True
230 | if data_list:
231 | print("Done scraping... Generating map..")
232 | map(key_counts_with_subtype, data_list)
233 | clear_screen()
234 | else:
235 | print(
236 | "You didn't scraped anything. One possible explanation is that you didn't move with the mouse at all, or you closed the browser before the site completely loaded and the program didn't began to scrape.")
237 | exit()
238 | else:
239 | sec = 5 # Default sec
240 | driver = start_server()
241 | while not err:
242 | try:
243 | title = driver.title # Checking if user didn't close FireFox
244 | time.sleep(sec)
245 | try:
246 | data_list = get_data(sec)
247 | except KeyboardInterrupt:
248 | exit() # Cleaning the errors when the user wants to quit
249 | f = open("data - its not the json data.txt", "w", encoding="utf-8")
250 | f.write(str(data_list))
251 | f.close()
252 | except (WebDriverException, MissingSchema):
253 | err = True
254 | if data_list:
255 | print(
256 | "Done scraping... Generating map..") # TODO: Remember to wait 5 seconds after closing the browser
257 | map(key_counts_with_subtype, data_list)
258 | clear_screen()
259 | else:
260 | print(
261 | "You didn't scraped anything. One possible explanation is that you didn't move with the mouse at all, or you closed the browser before the site completely loaded and the program didn't began to scrape.")
262 | exit()
263 | else:
264 | print("What was that? Try agian")
265 | exit()
266 |
--------------------------------------------------------------------------------