]\w+)\<\/b\>")
34 | LEAK_REGEX = re.compile(r"leak\-label\"\>\n\\(?P\S.+)\<\/b\>")
35 | DATE_REGEX = re.compile(r"Created\
\n\(?P\d{4}\-\d{2}\-\d{2})")
36 | time.sleep(1)
37 | wiki_df = pd.DataFrame(columns=['Date', 'Sender', 'Subject', 'URL', 'Leak'])
38 | soup_var = BeautifulSoup(REQ_VAR.content, "lxml")
39 | divtag_var = soup_var.findAll('div', {'class': 'result'})
40 | for a in divtag_var:
41 | url_var = URL_REGEX.findall(str(a))
42 | date_var = DATE_REGEX.findall(str(a))
43 | subj_var = SUBJ_REGEX.findall(str(a))
44 | sendr1_var = SENDR1_REGEX.findall(str(a))
45 | sendrx_var = SENDR2_REGEX.findall(str(a))
46 | leak_var = LEAK_REGEX.findall(str(a))
47 | sendr2_var = re.sub(r'\', '', str(sendrx_var))
48 | if sendr1_var:
49 | sendr_var = sendr1_var
50 | elif sendr2_var:
51 | sendr_var = sendr2_var
52 | wiki_df = wiki_df.append({'Date': date_var, 'Sender': sendr_var, 'Subject': subj_var, 'URL': url_var, 'Leak': leak_var}, ignore_index=True, sort=True)
53 | for index, r in wiki_df.iterrows():
54 | date_var = r['Date']
55 | sendr_var = r['Sender']
56 | subj_var = r['Subject']
57 | url_var = r['URL']
58 | leak_var = r['Leak']
59 | print('************************************************************')
60 | print(f'Date: {date_var}')
61 | print(f'Sender: {sendr_var}')
62 | print(f'Subject: {subj_var}')
63 | print(f'URL: {url_var}')
64 | print(f'Leak: {leak_var}')
65 | print('************************************************************')
66 |
67 |
68 | def continuer():
69 | r_epetite = input('Repeat? (Y/N)')
70 | if r_epetite.lower() == 'y':
71 | s_domain = input('Same domain? (Y/N)')
72 | if s_domain.lower() == 'y':
73 | wikileaks(REQ_VAR)
74 | else:
75 | n_domain = input('Enter a new domain...\t')
76 | VALIDATION_REGEX = re.compile(r"(?P\w+\.\w{2,6})")
77 | validation_check = VALIDATION_REGEX.findall(str(n_domain))
78 | if validation_check:
79 | page_count = 1
80 | while True:
81 | URL = 'https://search.wikileaks.org/?query=&exact_phrase=' + DOMAIN + \
82 | '&include_external_sources=True&order_by=newest_document_date&page=' + str(page_count)
83 | print(URL)
84 | REQ_VAR = requests.get(URL)
85 | wikileaks(URL, REQ_VAR)
86 | page_count += 1
87 | dframeName = f'{DOMAIN}.csv'
88 | wiki_df.to_csv(dframeName)
89 | continuer()
90 | else:
91 | exit()
92 |
93 | def run():
94 | try:
95 | DOMAIN = sys.argv[1]
96 | if DOMAIN == '-h':
97 | usage()
98 | elif DOMAIN == '--help':
99 | usage()
100 | else:
101 | VALIDATION_REGEX = re.compile(r"(?P\w+\.\w{2,6})")
102 | validation_check = VALIDATION_REGEX.findall(DOMAIN)
103 | if validation_check:
104 | page_count = 1
105 | while True:
106 | URL = 'https://search.wikileaks.org/?query=&exact_phrase=' + DOMAIN + \
107 | '&include_external_sources=True&order_by=newest_document_date&page=' + str(page_count)
108 | print(URL)
109 | REQ_VAR = requests.get(URL)
110 | wikileaks(URL, REQ_VAR)
111 | page_count += 1
112 | dframeName = f'{DOMAIN}.csv'
113 | wiki_df.to_csv(dframeName)
114 | continuer()
115 |
116 |
117 | except Exception as error_code:
118 | usage()
119 | #print(error_code)
120 |
121 |
122 | banner()
123 |
--------------------------------------------------------------------------------
/recon-ng_module/wikileaker.py:
--------------------------------------------------------------------------------
1 | # module specific imports
2 | import re
3 | import time
4 |
5 | import lxml.html
6 | from recon.core.module import BaseModule
7 |
8 |
9 | class Module(BaseModule):
10 |
11 | meta = {
12 | 'name': 'WikiLeaker',
13 | 'author': 'Joe Gray (@C_3PJoe)',
14 | 'version': '1.0',
15 | 'description': 'A WikiLeaks scraper inspired by the Datasploit module previously written in Python2. It '
16 | 'searches Wikileaks for leaks containing the subject domain. If anything is found, this module '
17 | 'will seek to parse out the URL, Sender Email, Date, Leak, and Subject of the email. This will '
18 | 'update the \'Contacts\' table with the results.',
19 | 'query': 'SELECT DISTINCT domain FROM domains WHERE domain IS NOT NULL',
20 | }
21 |
22 | def module_run(self, domains):
23 | for domain in domains:
24 | page_count = 1
25 | while True:
26 | URL = 'https://search.wikileaks.org/?query=&exact_phrase=' + domain + \
27 | '&include_external_sources=True&order_by=newest_document_date&page=' + str(page_count)
28 | self.verbose(URL)
29 | resp = self.request('GET', URL)
30 | time.sleep(1)
31 | if resp.status_code != 200:
32 | self.alert('An error occurred: ' + str(resp.status_code))
33 | break
34 | else:
35 | root = lxml.html.fromstring(resp.text)
36 | search_data = root.xpath('//div[@class="result"]')
37 |
38 | if len(search_data) > 0:
39 | for i in search_data:
40 | link = i.xpath("concat(div/h4/a[contains(@href, '/emails/emailid/')]/@href, '')").strip()
41 |
42 | if link:
43 | subject = i.xpath("concat(div/h4/a, '')").strip()
44 | leak = i.xpath("concat(div/div[@class='leak-label'], '')").strip()
45 | created = i.xpath("concat(div/div[@class='dates']/div[@class='date' and "
46 | "contains(text(), 'Created')]/span, '')").strip()
47 | excerpt = i.xpath("concat(div[@class='info']/div[@class='excerpt'], '')").strip()
48 |
49 | emails = re.findall("email:\\xa0([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)",
50 | excerpt)
51 |
52 | for email in emails:
53 | self.alert(f'Leak: {leak}')
54 | self.output(f'URL: {link}')
55 | self.verbose(f'Date: {created}')
56 | self.verbose(f'Sender: {email.strip()}')
57 | self.verbose(f'Subject: {subject}')
58 | self.insert_contacts(email=email.strip(), notes=link)
59 | else:
60 | break
61 |
62 | page_count += 1
63 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | bs4
2 | pandas
3 | requests
4 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | sudo apt-get install python3 python3-pip python3-lxml git libre-dev;
4 | pip install -r requirements.txt;
5 | chmod 755 ./WikiLeaker.py;
6 |
--------------------------------------------------------------------------------