125 |
126 |
127 |
128 |
Our Lovely Team
129 |
Lorem Ipsum is simply dummy text ever sincehar the 1500s, when an unknownshil printer
130 | took a galley of type and scrambled it to make a type specimen book. It has survived not
131 | only five centuries.
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
CEO, Qwilo
142 |
146 |
147 |
148 |
Simply dummy text ever sincehar the 1500s, when an unknownshil printer took a galley of
149 | type and scrambled it to make a type specimen book.
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
CEO, Qwilo
158 |
162 |
163 |
164 |
Simply dummy text ever sincehar the 1500s, when an unknownshil printer took a galley of
165 | type and scrambled it to make a type specimen book.
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
204 |
205 |
206 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
--------------------------------------------------------------------------------
/static/revolution/fonts/revicons/revicons90c6.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Copyright (C) 2013 by original authors @ fontello.com
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/feature_extraction.py:
--------------------------------------------------------------------------------
1 | import ipaddress
2 | import re
3 | import urllib.request
4 | from bs4 import BeautifulSoup
5 | import socket
6 | import requests
7 | from googlesearch import search
8 | import whois
9 | from datetime import datetime
10 | import time
11 | from dateutil.parser import parse as date_parse
12 |
13 | # Calculates number of months
14 | def diff_month(d1, d2):
15 | return (d1.year - d2.year) * 12 + d1.month - d2.month
16 |
17 | # Generate data set by extracting the features from the URL
18 | def generate_data_set(url):
19 |
20 | data_set = []
21 |
22 | # Converts the given URL into standard format
23 | if not re.match(r"^https?", url):
24 | url = "http://" + url
25 |
26 |
27 | # Stores the response of the given URL
28 | try:
29 | response = requests.get(url)
30 | soup = BeautifulSoup(response.text, 'html.parser')
31 | except:
32 | response = ""
33 | soup = -999
34 |
35 |
36 | # Extracts domain from the given URL
37 | domain = re.findall(r"://([^/]+)/?", url)[0]
38 | if re.match(r"^www.",domain):
39 | domain = domain.replace("www.","")
40 |
41 | # Requests all the information about the domain
42 | whois_response = whois.whois(domain)
43 |
44 | rank_checker_response = requests.post("https://www.checkpagerank.net/index.php", {
45 | "name": domain
46 | })
47 |
48 | # Extracts global rank of the website
49 | try:
50 | global_rank = int(re.findall(r"Global Rank: ([0-9]+)", rank_checker_response.text)[0])
51 | except:
52 | global_rank = -1
53 |
54 | # 1.having_IP_Address
55 | try:
56 | ipaddress.ip_address(url)
57 | data_set.append(-1)
58 | except:
59 | data_set.append(1)
60 |
61 | # 2.URL_Length
62 | if len(url) < 54:
63 | data_set.append(1)
64 | elif len(url) >= 54 and len(url) <= 75:
65 | data_set.append(0)
66 | else:
67 | data_set.append(-1)
68 |
69 | # 3.Shortining_Service
70 | match=re.search('bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|'
71 | 'yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|'
72 | 'short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|'
73 | 'doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|'
74 | 'db\.tt|qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|'
75 | 'q\.gs|is\.gd|po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|'
76 | 'x\.co|prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|tr\.im|link\.zip\.net',url)
77 | if match:
78 | data_set.append(-1)
79 | else:
80 | data_set.append(1)
81 |
82 | # 4.having_At_Symbol
83 | if re.findall("@", url):
84 | data_set.append(-1)
85 | else:
86 | data_set.append(1)
87 |
88 | # 5.double_slash_redirecting
89 | list=[x.start(0) for x in re.finditer('//', url)]
90 | if list[len(list)-1]>6:
91 | data_set.append(-1)
92 | else:
93 | data_set.append(1)
94 |
95 | # 6.Prefix_Suffix
96 | if re.findall(r"https?://[^\-]+-[^\-]+/", url):
97 | data_set.append(-1)
98 | else:
99 | data_set.append(1)
100 |
101 | # 7.having_Sub_Domain
102 | if len(re.findall("\.", url)) == 1:
103 | data_set.append(1)
104 | elif len(re.findall("\.", url)) == 2:
105 | data_set.append(0)
106 | else:
107 | data_set.append(-1)
108 |
109 | # 8.SSLfinal_State
110 | try:
111 | if response.text:
112 | data_set.append(1)
113 | except:
114 | data_set.append(-1)
115 |
116 | # 9.Domain_registeration_length
117 | expiration_date = whois_response.expiration_date
118 | registration_length = 0
119 | try:
120 | expiration_date = min(expiration_date)
121 | today = time.strftime('%Y-%m-%d')
122 | today = datetime.strptime(today, '%Y-%m-%d')
123 | registration_length = abs((expiration_date - today).days)
124 |
125 | if registration_length / 365 <= 1:
126 | data_set.append(-1)
127 | else:
128 | data_set.append(1)
129 | except:
130 | data_set.append(-1)
131 |
132 | # 10.Favicon
133 | if soup == -999:
134 | data_set.append(-1)
135 | else:
136 | try:
137 | for head in soup.find_all('head'):
138 | for head.link in soup.find_all('link', href=True):
139 | dots = [x.start(0) for x in re.finditer('\.', head.link['href'])]
140 | if url in head.link['href'] or len(dots) == 1 or domain in head.link['href']:
141 | data_set.append(1)
142 | raise StopIteration
143 | else:
144 | data_set.append(-1)
145 | raise StopIteration
146 | except StopIteration:
147 | pass
148 |
149 | #11. port
150 | try:
151 | port = domain.split(":")[1]
152 | if port:
153 | data_set.append(-1)
154 | else:
155 | data_set.append(1)
156 | except:
157 | data_set.append(1)
158 |
159 | #12. HTTPS_token
160 | if re.findall(r"^https://", url):
161 | data_set.append(1)
162 | else:
163 | data_set.append(-1)
164 |
165 | #13. Request_URL
166 | i = 0
167 | success = 0
168 | if soup == -999:
169 | data_set.append(-1)
170 | else:
171 | for img in soup.find_all('img', src= True):
172 | dots= [x.start(0) for x in re.finditer('\.', img['src'])]
173 | if url in img['src'] or domain in img['src'] or len(dots)==1:
174 | success = success + 1
175 | i=i+1
176 |
177 | for audio in soup.find_all('audio', src= True):
178 | dots = [x.start(0) for x in re.finditer('\.', audio['src'])]
179 | if url in audio['src'] or domain in audio['src'] or len(dots)==1:
180 | success = success + 1
181 | i=i+1
182 |
183 | for embed in soup.find_all('embed', src= True):
184 | dots=[x.start(0) for x in re.finditer('\.',embed['src'])]
185 | if url in embed['src'] or domain in embed['src'] or len(dots)==1:
186 | success = success + 1
187 | i=i+1
188 |
189 | for iframe in soup.find_all('iframe', src= True):
190 | dots=[x.start(0) for x in re.finditer('\.',iframe['src'])]
191 | if url in iframe['src'] or domain in iframe['src'] or len(dots)==1:
192 | success = success + 1
193 | i=i+1
194 |
195 | try:
196 | percentage = success/float(i) * 100
197 | if percentage < 22.0 :
198 | dataset.append(1)
199 | elif((percentage >= 22.0) and (percentage < 61.0)) :
200 | data_set.append(0)
201 | else :
202 | data_set.append(-1)
203 | except:
204 | data_set.append(1)
205 |
206 |
207 |
208 | #14. URL_of_Anchor
209 | percentage = 0
210 | i = 0
211 | unsafe=0
212 | if soup == -999:
213 | data_set.append(-1)
214 | else:
215 | for a in soup.find_all('a', href=True):
216 | # 2nd condition was 'JavaScript ::void(0)' but we put JavaScript because the space between javascript and :: might not be
217 | # there in the actual a['href']
218 | if "#" in a['href'] or "javascript" in a['href'].lower() or "mailto" in a['href'].lower() or not (url in a['href'] or domain in a['href']):
219 | unsafe = unsafe + 1
220 | i = i + 1
221 |
222 |
223 | try:
224 | percentage = unsafe / float(i) * 100
225 | except:
226 | data_set.append(1)
227 |
228 | if percentage < 31.0:
229 | data_set.append(1)
230 | elif ((percentage >= 31.0) and (percentage < 67.0)):
231 | data_set.append(0)
232 | else:
233 | data_set.append(-1)
234 |
235 | #15. Links_in_tags
236 | i=0
237 | success =0
238 | if soup == -999:
239 | data_set.append(-1)
240 | else:
241 | for link in soup.find_all('link', href= True):
242 | dots=[x.start(0) for x in re.finditer('\.',link['href'])]
243 | if url in link['href'] or domain in link['href'] or len(dots)==1:
244 | success = success + 1
245 | i=i+1
246 |
247 | for script in soup.find_all('script', src= True):
248 | dots=[x.start(0) for x in re.finditer('\.',script['src'])]
249 | if url in script['src'] or domain in script['src'] or len(dots)==1 :
250 | success = success + 1
251 | i=i+1
252 | try:
253 | percentage = success / float(i) * 100
254 | except:
255 | data_set.append(1)
256 |
257 | if percentage < 17.0 :
258 | data_set.append(1)
259 | elif((percentage >= 17.0) and (percentage < 81.0)) :
260 | data_set.append(0)
261 | else :
262 | data_set.append(-1)
263 |
264 | #16. SFH
265 | for form in soup.find_all('form', action= True):
266 | if form['action'] =="" or form['action'] == "about:blank" :
267 | data_set.append(-1)
268 | break
269 | elif url not in form['action'] and domain not in form['action']:
270 | data_set.append(0)
271 | break
272 | else:
273 | data_set.append(1)
274 | break
275 |
276 | #17. Submitting_to_email
277 | if response == "":
278 | data_set.append(-1)
279 | else:
280 | if re.findall(r"[mail\(\)|mailto:?]", response.text):
281 | data_set.append(1)
282 | else:
283 | data_set.append(-1)
284 |
285 | #18. Abnormal_URL
286 | if response == "":
287 | data_set.append(-1)
288 | else:
289 | if response.text == "":
290 | data_set.append(1)
291 | else:
292 | data_set.append(-1)
293 |
294 | #19. Redirect
295 | if response == "":
296 | data_set.append(-1)
297 | else:
298 | if len(response.history) <= 1:
299 | data_set.append(-1)
300 | elif len(response.history) <= 4:
301 | data_set.append(0)
302 | else:
303 | data_set.append(1)
304 |
305 | #20. on_mouseover
306 | if response == "" :
307 | data_set.append(-1)
308 | else:
309 | if re.findall("", response.text):
310 | data_set.append(1)
311 | else:
312 | data_set.append(-1)
313 |
314 | #21. RightClick
315 | if response == "":
316 | data_set.append(-1)
317 | else:
318 | if re.findall(r"event.button ?== ?2", response.text):
319 | data_set.append(1)
320 | else:
321 | data_set.append(-1)
322 |
323 | #22. popUpWidnow
324 | if response == "":
325 | data_set.append(-1)
326 | else:
327 | if re.findall(r"alert\(", response.text):
328 | data_set.append(1)
329 | else:
330 | data_set.append(-1)
331 |
332 | #23. Iframe
333 | if response == "":
334 | data_set.append(-1)
335 | else:
336 | if re.findall(r"[