├── README.md
├── badmalweb.py
├── malc0de.py
├── urlquery
└── vxvault.py
/README.md:
--------------------------------------------------------------------------------
1 | Malware
2 | =======
3 |
4 | Python Web Scraping
5 | collection of information for analysis.
6 |
7 |
8 | README.md
9 |
10 |
--------------------------------------------------------------------------------
/badmalweb.py:
--------------------------------------------------------------------------------
1 | import re
2 | import urllib
3 |
4 | url= "http://badmalweb.com/?p=live"
5 | htmltext = urllib.urlopen(url).read()
6 |
7 | ips = re.findall('
(.+?) | ',htmltext)
8 | dates = re.findall('(.+?) | ', htmltext)
9 | hosts = re.findall('(.+?) | ', htmltext)
10 |
11 | ips = [re.findall('">(.+?)', x) for x in ips]
12 |
13 | f = open('badmalweb.txt', 'a')
14 |
15 | newIpList = []
16 |
17 | f = open('badmalweb.txt', 'a')
18 |
19 | for x in ips:
20 | for x in x:
21 | newIpList.append(x)
22 |
23 | for index, value in enumerate(newIpList):
24 | final = "%s - %s - %s" % (value, dates[index], hosts[index])
25 | print final
26 | f.write(final + "\n")
27 | f.close()
28 |
29 | #URL: http://badmalweb.com/?p=live
30 |
--------------------------------------------------------------------------------
/malc0de.py:
--------------------------------------------------------------------------------
1 |
2 | import requests
3 | import os.path
4 | from bs4 import BeautifulSoup
5 |
6 | url="http://malc0de.com/bl/IP_Blacklist.txt"
7 | r=requests.get(url)
8 | e = BeautifulSoup(r.content)
9 | links = e.find_all()[0]
10 |
11 | if os.path.isfile("/usr/lib/python2.7/scriptpath/malc0de.txt"):
12 | print "File exists new lines added!"
13 | f=open("/usr/lib/python2.7/scriptpath/malc0de.txt", 'a').write(str(links))
14 |
15 | else:
16 | f = open("/usr/lib/python2.7/scriptpath/malc0de.txt", "w").write(str(links))
17 | print"File doesn't exist creating one!"
18 |
19 | #URLs:
20 | #http://malc0de.com/bl/
21 | #http://malc0de.com/bl/IP_Blacklist.txt
22 |
--------------------------------------------------------------------------------
/urlquery:
--------------------------------------------------------------------------------
1 | import re
2 | import urllib
3 | from HTMLParser import HTMLParser
4 |
5 | url="http://urlquery.net/index.php"
6 | htmlfile = urllib.urlopen(url)
7 | htmltext = htmlfile.read()
8 | ip = ''
9 | pattern = re.compile(ip)
10 | l = re.findall(pattern,htmltext)
11 |
12 | class HTMLPrase(HTMLParser):
13 | container = ""
14 | def handle_data(self,data):
15 | self.container+=data
16 | return self.container
17 | h=HTMLPrase()
18 | h.feed(str(l))
19 | e=h.container
20 |
21 | sp=e.split(",")
22 |
23 | f = open('ips.txt', 'a')
24 |
25 | for spl in sp:
26 | spl = spl.replace("'", '').replace('[', '').replace(']', '').replace('"text-align:center;vertical-align=middle;>', '').replace('"', '').strip()
27 | print spl
28 | f.write(spl + "\n")
29 | f.close()
30 |
31 | #URLs:
32 | #http://urlquery.net/index.php
33 |
--------------------------------------------------------------------------------
/vxvault.py:
--------------------------------------------------------------------------------
1 |
2 | import urllib2
3 | import os.path
4 | from html2text import html2text
5 |
6 | e= html2text(urllib2.urlopen("http://vxvault.siri-urz.net/URL_List.php").read())
7 |
8 | if os.path.isfile("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt"):
9 | print "File exists new lines added!"
10 | f=open("//usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", 'a').write(str(e))
11 |
12 | else:
13 | f = open("/usr/lib/python2.7/scriptpath/vxvault.siri-urz.net.txt", "w").write(str(e))
14 | print"File doesn't exist creating one!"
15 |
16 | #URLs:
17 | #http://vxvault.siri-urz.net/URL_List.php
18 | #http://vxvault.siri-urz.net/ViriList.php
19 |
20 |
21 |
--------------------------------------------------------------------------------
|