├── README.md ├── badmalweb.py ├── malc0de.py ├── urlquery └── vxvault.py /README.md: -------------------------------------------------------------------------------- 1 | Malware 2 | ======= 3 | 4 | Python Web Scraping 5 | collection of information for analysis. 6 | 7 | 8 | README.md 9 | 10 | -------------------------------------------------------------------------------- /badmalweb.py: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib 3 | 4 | url= "http://badmalweb.com/?p=live" 5 | htmltext = urllib.urlopen(url).read() 6 | 7 | ips = re.findall('(.+?)',htmltext) 8 | dates = re.findall('(.+?)', htmltext) 9 | hosts = re.findall('(.+?)', htmltext) 10 | 11 | ips = [re.findall('">(.+?)', x) for x in ips] 12 | 13 | f = open('badmalweb.txt', 'a') 14 | 15 | newIpList = [] 16 | 17 | f = open('badmalweb.txt', 'a') 18 | 19 | for x in ips: 20 | for x in x: 21 | newIpList.append(x) 22 | 23 | for index, value in enumerate(newIpList): 24 | final = "%s - %s - %s" % (value, dates[index], hosts[index]) 25 | print final 26 | f.write(final + "\n") 27 | f.close() 28 | 29 | #URL: http://badmalweb.com/?p=live 30 | -------------------------------------------------------------------------------- /malc0de.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | import os.path 4 | from bs4 import BeautifulSoup 5 | 6 | url="http://malc0de.com/bl/IP_Blacklist.txt" 7 | r=requests.get(url) 8 | e = BeautifulSoup(r.content) 9 | links = e.find_all()[0] 10 | 11 | if os.path.isfile("/usr/lib/python2.7/scriptpath/malc0de.txt"): 12 | print "File exists new lines added!" 13 | f=open("/usr/lib/python2.7/scriptpath/malc0de.txt", 'a').write(str(links)) 14 | 15 | else: 16 | f = open("/usr/lib/python2.7/scriptpath/malc0de.txt", "w").write(str(links)) 17 | print"File doesn't exist creating one!" 18 | 19 | #URLs: 20 | #http://malc0de.com/bl/ 21 | #http://malc0de.com/bl/IP_Blacklist.txt 22 | -------------------------------------------------------------------------------- /urlquery: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib 3 | from HTMLParser import HTMLParser 4 | 5 | url="http://urlquery.net/index.php" 6 | htmlfile = urllib.urlopen(url) 7 | htmltext = htmlfile.read() 8 | ip = '' 9 | pattern = re.compile(ip) 10 | l = re.findall(pattern,htmltext) 11 | 12 | class HTMLPrase(HTMLParser): 13 | container = "" 14 | def handle_data(self,data): 15 | self.container+=data 16 | return self.container 17 | h=HTMLPrase() 18 | h.feed(str(l)) 19 | e=h.container 20 | 21 | sp=e.split(",") 22 | 23 | f = open('ips.txt', 'a') 24 | 25 | for spl in sp: 26 | spl = spl.replace("'", '').replace('[', '').replace(']', '').replace('"text-align:center;vertical-align=middle;>', '').replace('"', '').strip() 27 | print spl 28 | f.write(spl + "\n") 29 | f.close() 30 | 31 | #URLs: 32 | #http://urlquery.net/index.php 33 | -------------------------------------------------------------------------------- /vxvault.py: -------------------------------------------------------------------------------- 1 | 2 | import urllib2 3 | import os.path 4 | from html2text import html2text 5 | 6 | e= html2text(urllib2.urlopen("http://vxvault.siri-urz.net/URL_List.php").read()) 7 | 8 | if os.path.isfile("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt"): 9 | print "File exists new lines added!" 10 | f=open("//usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", 'a').write(str(e)) 11 | 12 | else: 13 | f = open("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", "w").write(str(e)) 14 | print"File doesn't exist creating one!" 15 | 16 | #URLs: 17 | #http://vxvault.siri-urz.net/URL_List.php 18 | #http://vxvault.siri-urz.net/ViriList.php 19 | 20 | 21 | --------------------------------------------------------------------------------