├── README.md
├── badmalweb.py
├── malc0de.py
├── urlquery
└── vxvault.py


/README.md:
--------------------------------------------------------------------------------
 1 | Malware
 2 | =======
 3 | 
 4 | Python Web Scraping 
 5 | collection of information for analysis.
 6 | 
 7 | 
 8 | README.md
 9 | 
10 | 


--------------------------------------------------------------------------------
/badmalweb.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib
 3 | 
 4 | url= "http://badmalweb.com/?p=live"
 5 | htmltext = urllib.urlopen(url).read()
 6 | 
 7 | ips = re.findall('<td class="ip">(.+?)</td>',htmltext)
 8 | dates = re.findall('<td class="date">(.+?)</td>', htmltext)
 9 | hosts = re.findall('<td class="url">(.+?)</td>', htmltext)
10 | 
11 | ips = [re.findall('">(.+?)</a>', x) for x in ips]
12 | 
13 | f = open('badmalweb.txt', 'a')
14 | 
15 | newIpList = []
16 | 
17 | f = open('badmalweb.txt', 'a')
18 | 
19 | for x in ips:
20 |     for x in x:
21 |       newIpList.append(x)
22 | 
23 | for index, value in enumerate(newIpList):
24 |     final = "%s  -  %s  -  %s" % (value, dates[index], hosts[index])
25 |     print final
26 |     f.write(final + "\n")
27 | f.close() 
28 | 
29 | #URL: http://badmalweb.com/?p=live
30 | 


--------------------------------------------------------------------------------
/malc0de.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import requests
 3 | import os.path
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | url="http://malc0de.com/bl/IP_Blacklist.txt"
 7 | r=requests.get(url)
 8 | e = BeautifulSoup(r.content)
 9 | links = e.find_all()[0]
10 | 
11 | if os.path.isfile("/usr/lib/python2.7/scriptpath/malc0de.txt"):
12 | 	print "File exists new lines added!"
13 | 	f=open("/usr/lib/python2.7/scriptpath/malc0de.txt", 'a').write(str(links))
14 | 	
15 | else:
16 | 	f = open("/usr/lib/python2.7/scriptpath/malc0de.txt", "w").write(str(links))
17 | 	print"File doesn't exist creating one!"
18 | 
19 | #URLs:
20 | #http://malc0de.com/bl/
21 | #http://malc0de.com/bl/IP_Blacklist.txt
22 | 


--------------------------------------------------------------------------------
/urlquery:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib
 3 | from HTMLParser import HTMLParser
 4 | 
 5 | url="http://urlquery.net/index.php"
 6 | htmlfile = urllib.urlopen(url)
 7 | htmltext = htmlfile.read()
 8 | ip = '<td style=(.+?)</td>'
 9 | pattern = re.compile(ip)
10 | l = re.findall(pattern,htmltext)
11 | 
12 | class HTMLPrase(HTMLParser):
13 |     container = ""
14 |     def handle_data(self,data):
15 |       self.container+=data
16 |       return self.container
17 | h=HTMLPrase()
18 | h.feed(str(l))
19 | e=h.container
20 | 
21 | sp=e.split(",")
22 | 
23 | f = open('ips.txt', 'a')
24 | 
25 | for spl in sp:
26 |     spl = spl.replace("'", '').replace('[', '').replace(']', '').replace('"text-align:center;vertical-align=middle;>', '').replace('"', '').strip()
27 |     print spl
28 |     f.write(spl + "\n")
29 | f.close() 
30 | 
31 | #URLs:
32 | #http://urlquery.net/index.php
33 | 


--------------------------------------------------------------------------------
/vxvault.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import urllib2
 3 | import os.path
 4 | from html2text import html2text
 5 | 
 6 | e= html2text(urllib2.urlopen("http://vxvault.siri-urz.net/URL_List.php").read())
 7 | 
 8 | if os.path.isfile("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt"):
 9 | 	print "File exists new lines added!"
10 | 	f=open("//usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", 'a').write(str(e))
11 | 	
12 | else:
13 | 	f = open("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", "w").write(str(e))
14 | 	print"File doesn't exist creating one!"
15 | 
16 | #URLs:
17 | #http://vxvault.siri-urz.net/URL_List.php
18 | #http://vxvault.siri-urz.net/ViriList.php
19 | 
20 |  
21 | 


--------------------------------------------------------------------------------