├── .build.sh
├── .coverage.rc
├── .env
├── .github
└── ISSUE_TEMPLATE.md
├── .gitignore
├── .landscape.yml
├── .release.sh
├── .travis.yml
├── CHANGELOG.md
├── Common
├── SimplyEmail.ini
├── SimplyEmailAggressive.ini
├── SimplyEmailTest.ini
├── TaskController.py
└── __init__.py
├── Dockerfile
├── Helpers
├── CanarioAPI.py
├── Connect6.py
├── Converter.py
├── Download.py
├── EmailFormat.py
├── HtmlBootStrapTheme.py
├── LinkedinNames.py
├── Parser.py
├── VerifyEmails.py
├── VersionCheck.py
├── __init__.py
├── bootstrap-3.3.5
│ ├── LICENSE
│ ├── Screen Shot 2015-11-11 at 5.27.15 PM.png
│ ├── Screen Shot 2015-11-11 at 5.27.31 PM.png
│ └── SimplyEmailTemplate.html
├── helpers.py
└── messages.py
├── LICENSE
├── Modules
├── AskSearch.py
├── CanarioAPI.py.old
├── CanaryBinSearch.py.old
├── ExaleadDOCSearch.py
├── ExaleadDOCXSearch.py
├── ExaleadPDFSearch.py
├── ExaleadPPTXSearch.py
├── ExaleadSearch.py
├── ExaleadXLSXSearch.py
├── FlickrSearch.py
├── GitHubCodeSearch.py
├── GitHubGistSearch.py
├── GitHubUserSearch.py
├── GoogleCsvSearch.py
├── GoogleDocSearch.py
├── GoogleDocxSearch.py
├── GooglePDFSearch.py
├── GooglePPTXSearch.py
├── GoogleSearch.py
├── GoogleXLSXSearch.py
├── HtmlScrape.py
├── Hunter.py
├── OnInstagram.py.old
├── PasteBinSearch.py
├── RedditPostSearch.py
├── SearchPGP.py
├── WhoisAPISearch.py
├── Whoisolgy.py
├── YahooSearch.py
└── __init__.py
├── README.md
├── SimplyEmail.py
├── VERSION
├── docs
├── _config.yml
└── index.md
├── setup
├── License
│ ├── LICENSE-BootStrap-Twitter
│ ├── LICENSE-SimplyEmail
│ ├── LICENSE-Veil
│ ├── LICENSE-Version
│ └── LICENSE-theHarvester
├── oneline-setup.sh
├── requirments.txt
└── setup.sh
└── tests
├── Test-DOC.doc
├── Test-DOCX.docx
├── Test-PDF.pdf
├── Test-PPTX.pptx
├── __init__.py
└── test_simplyemail_list.py
/.build.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env bash
3 | set -ex
4 | # SET THE FOLLOWING VARIABLES
5 | # docker hub username
6 | USERNAME=simplysecurity
7 | # image name
8 | IMAGE=simplyemail
9 | # version
10 | VERSION="$(cat VERSION)"
11 |
12 | docker build -t $USERNAME/$IMAGE:latest .
13 |
--------------------------------------------------------------------------------
/.coverage.rc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit =
3 | */python?.?/*
4 | */site-packages/nose/*
5 | *__init__*
6 | */SE/*
7 |
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | source SE/bin/activate
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## SimplyEmail Version
2 |
3 |
4 | ## OS Information (Linux flavor, Python version)
5 |
6 |
7 | ## Expected behavior and description of the error, including any actions taken immediately prior to the error. The more detail the better.
8 |
9 |
10 | ## Screenshot of error, embedded text output, or Pastebin link to the error
11 |
12 |
13 | ## If complicated error please provide (.SimplyEmail.log) in pastebin/gist /or code text block for the scrape.
14 | ### To increase logging go to Helpers/helpers.py -- line 144 and change to "logger.setLevel(logging.DEBUG)"
15 |
16 |
17 | ## Any additional information
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 |
47 | # Translations
48 | *.mo
49 | *.pot
50 |
51 | # Django stuff:
52 | *.log
53 |
54 | # Sphinx documentation
55 | docs/_build/
56 |
57 | # PyBuilder
58 | target/
59 |
60 | # db
61 | *.db
62 |
63 | # certs
64 | *.pem
65 |
--------------------------------------------------------------------------------
/.landscape.yml:
--------------------------------------------------------------------------------
1 | doc-warnings: no
2 | test-warnings: no
3 | max-line-length: 120
4 | autodetect: yes
5 | python-targets:
6 | - 2
7 |
8 |
--------------------------------------------------------------------------------
/.release.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -ex
3 | # Requires the following packages: git, hub, docker
4 | # SET THE FOLLOWING VARIABLES
5 | USERNAME=simplysecurity
6 | IMAGE=simplyemail
7 | VERSION="$(cat VERSION)"
8 |
9 | # UPDATE THE SOURCE CODE
10 | git pull
11 |
12 | # bump version
13 | docker run --rm -v "$PWD":/app treeder/bump patch
14 | VERSION=`cat VERSION`
15 | echo "version: $VERSION"
16 |
17 |
18 | # TAF, PULL, MERGE DEV
19 | git checkout -b "Version-$VERSION"
20 | git add --all
21 | git commit -m "SimplyEmail $VERSION Release"
22 | #git tag -a "$VERSION" -m "SimplyEmail $VERSION Release"
23 | git push origin "Version-$VERSION"
24 | #git push origin "Version-$VERSION" --tags
25 | git checkout master
26 | git merge "Version-$VERSION"
27 | git push
28 | hub release create Version-$VERSION -m "SimplyEmail $VERSION Release"
29 | # DEL BRANCH
30 | #git branch -d "dev"
31 | #git branch -D "dev"
32 |
33 | # ALERT VERSION
34 | echo "Building Version: $VERSION"
35 |
36 | # START BUILD
37 | ./.build.sh
38 |
39 | # DOCKER TAG/VERSIONING
40 | docker tag $USERNAME/$IMAGE:latest $USERNAME/$IMAGE:$VERSION
41 |
42 | # PUSH TO DOCKER HUB
43 | docker push $USERNAME/$IMAGE:latest
44 | echo "Docker image pushed: $USERNAME/$IMAGE:latest"
45 | docker push $USERNAME/$IMAGE:$VERSION
46 | echo "Docker image pushed: $USERNAME/$IMAGE:$VERSION"
47 |
48 | # GENERATE CHANGELOG FOR TAG AND PUSH
49 | github_changelog_generator
50 | git add --all
51 | git commit -m "Update CHANGLOG.md from version bump"
52 | git push
53 |
54 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "2.7"
4 | install:
5 | - pip install coveralls
6 | - pip install coverage
7 | - pip install nose
8 | script:
9 | - "git clone --branch master https://github.com/killswitch-GUI/SimplyEmail.git"
10 | - "sudo sh setup/setup.sh"
11 | - "cd SimplyEmail"
12 | - "./setup/setup.sh"
13 | - "source SE/bin/activate"
14 | - "sudo ./SimplyEmail.py -l"
15 | - "sudo ./SimplyEmail.py -h"
16 | - "sudo ./SimplyEmail.py -n"
17 | - "sudo ./SimplyEmail.py -verify"
18 | - "sudo ./SimplyEmail.py -s"
19 | - "sudo ./SimplyEmail.py -s -e cybersyndicates.com"
20 | - nosetests --with-coverage
21 |
22 | after_success:
23 | - coveralls
24 |
25 |
--------------------------------------------------------------------------------
/Common/SimplyEmail.ini:
--------------------------------------------------------------------------------
1 | [GlobalSettings]
2 | UserAgent: Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6
3 | SaveFile: Email_List.txt
4 | HtmlFile: Email_List.html
5 | Version: v1.5.0
6 | VersionRepoCheck: Yes
7 | VersionRepoCheckLocation: https://raw.githubusercontent.com/killswitch-GUI/SimplyEmail/master/setup/License/LICENSE-Version
8 |
9 | [ProcessConfig]
10 | TotalProcs: 8
11 |
12 | # Sets the default sleep time for all search
13 | # engines, help reduce the Captcha issues.
14 | [SleepConfig]
15 | QuerySleep = 30
16 | QueryJitter = 10
17 |
18 | # API keys will be labeld
19 | # By the service
20 | [APIKeys]
21 | Hunter:
22 |
23 | # Settings for HTML Scrapping module
24 | # Save can add in a path - default is the SimpleEmail folder with domain name
25 | [HtmlScrape]
26 | Depth: 1
27 | Wait: 0
28 | LimitRate: 100000k
29 | Timeout: 2
30 | Maxfilesize:
31 | Save:
32 | RemoveHTML: Yes
33 |
34 | # Settings for Hunter API module
35 | # Request Limit: sets the maximum number of requests SimplyEmail will make of Hunter API every time its run. Every
36 | # 10 emails counts as 1 request. Set to 0 for no limit
37 | # Quota Limit: Hunter gives you 100 or 150 searches on the free plan, depending on whether you registered with a work email or not.
38 | # This setting will stop making requests when you hit the specified limit
39 | # EmailType: choose whether to only return generic (ex. info@example.com) or personal (ex. johndoe@example.com) emails or both
40 | [Hunter]
41 | RequestLimit: 10
42 | QuotaLimit: 100
43 | EmailType: Personal
44 |
45 | # You can use a few diffrent Key Servers so a config may be a good idea for this
46 | [SearchPGP]
47 | KeyServer: pgp.rediris.es:11371
48 | Hostname: pgp.rediris.es
49 |
50 | # Settings for Google Search
51 | [GoogleSearch]
52 | StartQuantity: 100
53 | QueryLimit: 500
54 | QueryStart: 0
55 |
56 | #Flickr Settings
57 | [FlickrSearch]
58 | Hostname: flickr.com
59 |
60 | #GitHub Code Scraping settigns
61 | #Page Depth: WARNING every page can contain up to 30 users and multiple links to scrape, this can slow down the results obtain very fast
62 | [GitHubSearch]
63 | PageDepth: 3
64 | QueryStart: 1
65 |
66 | #StartPage Search engine settings
67 | [StartPageSearch]
68 | StartQuantity: 100
69 | QueryLimit: 1000
70 | QueryStart: 0
71 |
72 | #YahooSearch engine settings
73 | [YahooSearch]
74 | StartQuantity: 100
75 | QueryLimit: 600
76 | QueryStart: 0
77 |
78 | #Canary PasteBin Search NON-API
79 | [CanaryPasteBin]
80 | PageDepth: 2
81 | QueryStart: 1
82 | MaxPastesToSearch: 50
83 |
84 | # Search Git Hub Gist code
85 | # Page Depth: WARNING every page can contain upto 30 users and multiple links to scrape, this can slow down the results obtain very fast
86 | [GitHubGistSearch]
87 | PageDepth: 3
88 | QueryStart: 1
89 |
90 | # Ask Search Engine Search
91 | [AskSearch]
92 | QueryPageLimit: 10
93 | QueryStart: 0
94 |
95 | # Search Github Users
96 | # This can return a TON of users, but dependent on parser
97 | [GitHubUserSearch]
98 | PageDepth: 15
99 | QueryStart: 1
100 |
101 | # Settings for ReditPost search
102 | [RedditPostSearch]
103 | QueryLimit: 200
104 | QueryStart: 0
105 |
106 | # Settings for Google PDF Search
107 | [GooglePDFSearch]
108 | StartQuantity: 0
109 | QueryLimit: 50
110 | QueryStart: 0
111 |
112 | # Settings for Google doc Search
113 | [GoogleDocSearch]
114 | StartQuantity: 0
115 | QueryLimit: 20
116 | QueryStart: 0
117 |
118 | # Settings for Google xlsx Search
119 | [GoogleXlsxSearch]
120 | StartQuantity: 0
121 | QueryLimit: 20
122 | QueryStart: 0
123 |
124 | # Settings for Google pptx Search
125 | [GooglePptxSearch]
126 | StartQuantity: 0
127 | QueryLimit: 20
128 | QueryStart: 0
129 |
130 | # Settings for Google docx Search
131 | [GoogleDocxSearch]
132 | StartQuantity: 0
133 | QueryLimit: 20
134 | QueryStart: 0
135 |
136 | # Settings for Google csv Search
137 | [GoogleCsvSearch]
138 | StartQuantity: 0
139 | QueryLimit: 20
140 | QueryStart: 0
141 |
142 | # Settings for PasteBin Search on Google
143 | [GooglePasteBinSearch]
144 | StartQuantity: 100
145 | QueryLimit: 500
146 | QueryStart: 0
147 |
148 | # Settings for Exalead Search
149 | [ExaleadSearch]
150 | StartQuantity: 30
151 | QueryLimit: 100
152 | QueryStart: 0
153 |
154 | # Settings for Exalead Search
155 | [ExaleadPPTXSearch]
156 | StartQuantity: 30
157 | QueryLimit: 100
158 | QueryStart: 0
159 |
160 | # Settings for Exalead PDF Search
161 | [ExaleadPDFSearch]
162 | StartQuantity: 30
163 | QueryLimit: 20
164 | QueryStart: 0
165 |
166 | # Settings for Exalead DOC Search
167 | [ExaleadDOCSearch]
168 | StartQuantity: 30
169 | QueryLimit: 20
170 | QueryStart: 0
171 |
172 | # Settings for Exalead DOCX Search
173 | [ExaleadDOCXSearch]
174 | StartQuantity: 30
175 | QueryLimit: 20
176 | QueryStart: 0
177 |
178 | # Settings for Exalead XLSX Search
179 | [ExaleadXLSXSearch]
180 | StartQuantity: 30
181 | QueryLimit: 20
182 | QueryStart: 0
183 |
--------------------------------------------------------------------------------
/Common/SimplyEmailAggressive.ini:
--------------------------------------------------------------------------------
1 | [GlobalSettings]
2 | UserAgent: (Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6
3 | SaveFile: Email_List.txt
4 | HtmlFile: Email_List.html
5 | Version: v1.5.0
6 | VersionRepoCheck: Yes
7 | VersionRepoCheckLocation: https://raw.githubusercontent.com/killswitch-GUI/SimplyEmail/master/License/LICENSE-Version
8 |
9 | [ProcessConfig]
10 | TotalProcs: 8
11 |
12 | # API keys will be labeld
13 | # By the service
14 | [APIKeys]
15 | Canario:
16 |
17 | # Settings for HTML Scrapping module
18 | # Save can add in a path - default is the SimpleEmail folder with domain name
19 | [HtmlScrape]
20 | Depth: 10
21 | Wait: 0
22 | LimitRate: 10000k
23 | Timeout: 2
24 | Save:
25 | RemoveHTML: Yes
26 |
27 | # You can use a few diffrent Key Servers so a config may be a good idea for this
28 | [SearchPGP]
29 | KeyServer: pgp.rediris.es:11371
30 | Hostname: pgp.rediris.es
31 |
32 | # Settings for Google Search
33 | [GoogleSearch]
34 | StartQuantity: 100
35 | QueryLimit: 800
36 | QueryStart: 0
37 |
38 | #Flickr Settings
39 | [FlickrSearch]
40 | Hostname: flickr.com
41 |
42 | #GitHub Code Scraping settigns
43 | #Page Depth: WARNING every page can contain upto 30 users and multiple links to scrape, this can slow down the results obtain very fast
44 | [GitHubSearch]
45 | PageDepth: 10
46 | QueryStart: 1
47 |
48 | #StartPage Search engine settings
49 | [StartPageSearch]
50 | StartQuantity: 100
51 | QueryLimit: 2000
52 | QueryStart: 0
53 |
54 | #YahooSearch engine settings
55 | [YahooSearch]
56 | StartQuantity: 100
57 | QueryLimit: 900
58 | QueryStart: 0
59 |
60 | #Canary PasteBin Search NON-API
61 | [CanaryPasteBin]
62 | PageDepth: 4
63 | QueryStart: 1
64 | MaxPastesToSearch: 50
65 |
66 | # Search Git Hub Gist code
67 | # Page Depth: WARNING every page can contain upto 30 users and multiple links to scrape, this can slow down the results obtain very fast
68 | [GitHubGistSearch]
69 | PageDepth: 10
70 | QueryStart: 1
71 |
72 | # Ask Search Engine Search
73 | [AskSearch]
74 | QueryPageLimit: 50
75 | QueryStart: 0
76 |
77 | # Search Github Users
78 | # This can return a TON of users, but dependent on parser
79 | [GitHubUserSearch]
80 | PageDepth: 30
81 | QueryStart: 1
82 |
83 | # Settings for ReditPost search
84 | [RedditPostSearch]
85 | QueryLimit: 500
86 | QueryStart: 0
87 |
88 | # Settings for Google Search
89 | [GooglePDFSearch]
90 | StartQuantity: 0
91 | QueryLimit: 500
92 | QueryStart: 0
93 |
94 | # Settings for Google doc Search
95 | [GoogleDocSearch]
96 | StartQuantity: 0
97 | QueryLimit: 500
98 | QueryStart: 0
99 |
100 | # Settings for Google xlsx Search
101 | [GoogleXlsxSearch]
102 | StartQuantity: 0
103 | QueryLimit: 500
104 | QueryStart: 0
105 |
106 | # Settings for Google pptx Search
107 | [GooglePptxSearch]
108 | StartQuantity: 0
109 | QueryLimit: 20
110 | QueryStart: 0
111 |
112 | # Settings for Google docx Search
113 | [GoogleDocxSearch]
114 | StartQuantity: 0
115 | QueryLimit: 500
116 | QueryStart: 0
117 |
118 | # Settings for PasteBin Search on Google
119 | [GooglePasteBinSearch]
120 | StartQuantity: 100
121 | QueryLimit: 800
122 | QueryStart: 0
123 |
124 | # Settings for Exalead Search
125 | [ExaleadSearch]
126 | StartQuantity: 30
127 | QueryLimit: 400
128 | QueryStart: 0
129 |
130 | # Settings for Exalead Search
131 | [ExaleadPPTXSearch]
132 | StartQuantity: 30
133 | QueryLimit: 400
134 | QueryStart: 0
135 |
136 | # Settings for Exalead PDF Search
137 | [ExaleadPDFSearch]
138 | StartQuantity: 30
139 | QueryLimit: 250
140 | QueryStart: 0
141 |
142 | # Settings for Exalead DOC Search
143 | [ExaleadDOCSearch]
144 | StartQuantity: 30
145 | QueryLimit: 250
146 | QueryStart: 0
147 |
148 | # Settings for Exalead DOCX Search
149 | [ExaleadDOCXSearch]
150 | StartQuantity: 30
151 | QueryLimit: 250
152 | QueryStart: 0
153 |
154 | # Settings for Exalead XLSX Search
155 | [ExaleadXLSXSearch]
156 | StartQuantity: 30
157 | QueryLimit: 250
158 | QueryStart: 0
159 |
160 |
--------------------------------------------------------------------------------
/Common/SimplyEmailTest.ini:
--------------------------------------------------------------------------------
1 | [GlobalSettings]
2 | UserAgent: (Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6
3 | SaveFile: Email_List.txt
4 | HtmlFile: Email_List.html
5 | Version: v1.5.0
6 | VersionRepoCheck: Yes
7 | VersionRepoCheckLocation: https://raw.githubusercontent.com/killswitch-GUI/SimplyEmail/master/License/LICENSE-Version
8 |
9 | [ProcessConfig]
10 | TotalProcs: 8
11 |
12 | # API keys will be labeld
13 | # By the service
14 | [APIKeys]
15 | Canario:
16 |
17 | # Settings for HTML Scrapping module
18 | # Save can add in a path - default is the SimpleEmail folder with domain name
19 | [HtmlScrape]
20 | Depth: 1
21 | Wait: 0
22 | LimitRate: 10000k
23 | Timeout: 2
24 | Maxfilesize:
25 | Save:
26 | RemoveHTML: Yes
27 |
28 | # You can use a few diffrent Key Servers so a config may be a good idea for this
29 | [SearchPGP]
30 | KeyServer: pgp.rediris.es:11371
31 | Hostname: pgp.rediris.es
32 |
33 | # Settings for Google Search
34 | [GoogleSearch]
35 | StartQuantity: 100
36 | QueryLimit: 100
37 | QueryStart: 0
38 |
39 | #Flickr Settings
40 | [FlickrSearch]
41 | Hostname: flickr.com
42 |
43 | #GitHub Code Scraping settigns
44 | #Page Depth: WARNING every page can contain upto 30 users and multiple links to scrape, this can slow down the results obtain very fast
45 | [GitHubSearch]
46 | PageDepth: 1
47 | QueryStart: 1
48 |
49 | #StartPAge Search engine settings
50 | [StartPageSearch]
51 | StartQuantity: 100
52 | QueryLimit: 100
53 | QueryStart: 0
54 |
55 | #YahooSearch engine settings
56 | [YahooSearch]
57 | StartQuantity: 100
58 | QueryLimit: 100
59 | QueryStart: 0
60 |
61 | #Canary PasteBin Search NON-API
62 | [CanaryPasteBin]
63 | PageDepth: 1
64 | QueryStart: 1
65 | MaxPastesToSearch: 5
66 |
67 | # Search Git Hub Gist code
68 | # Page Depth: WARNING every page can contain upto 30 users and multiple links to scrape, this can slow down the results obtain very fast
69 | [GitHubGistSearch]
70 | PageDepth: 1
71 | QueryStart: 1
72 |
73 | # Ask Search Engine Search
74 | [AskSearch]
75 | QueryPageLimit: 1
76 | QueryStart: 0
77 |
78 | # Search Github Users
79 | # This can return a TON of users, but dependent on parser
80 | [GitHubUserSearch]
81 | PageDepth: 1
82 | QueryStart: 1
83 |
84 | # Settings for ReditPost search
85 | [RedditPostSearch]
86 | QueryLimit: 100
87 | QueryStart: 0
88 |
89 | # Settings for Google PDF Search
90 | [GooglePDFSearch]
91 | StartQuantity: 0
92 | QueryLimit: 10
93 | QueryStart: 0
94 |
95 | # Settings for Google doc Search
96 | [GoogleDocSearch]
97 | StartQuantity: 0
98 | QueryLimit: 10
99 | QueryStart: 0
100 |
101 | # Settings for Google csv Search
102 | [GoogleCsvSearch]
103 | StartQuantity: 0
104 | QueryLimit: 20
105 | QueryStart: 0
106 |
107 | # Settings for Google pptx Search
108 | [GooglePptxSearch]
109 | StartQuantity: 0
110 | QueryLimit: 20
111 | QueryStart: 0
112 |
113 | # Settings for Google csv Search
114 | [GoogleCsvSearch]
115 | StartQuantity: 0
116 | QueryLimit: 20
117 | QueryStart: 0
118 |
119 | # Settings for Google xlsx Search
120 | [GoogleXlsxSearch]
121 | StartQuantity: 0
122 | QueryLimit: 10
123 | QueryStart: 0
124 |
125 | # Settings for Google docx Search
126 | [GoogleDocxSearch]
127 | StartQuantity: 0
128 | QueryLimit: 10
129 | QueryStart: 0
130 |
131 | # Settings for PasteBin Search on Google
132 | [GooglePasteBinSearch]
133 | StartQuantity: 100
134 | QueryLimit: 100
135 | QueryStart: 0
136 |
137 | # Settings for Exalead Search
138 | [ExaleadSearch]
139 | StartQuantity: 30
140 | QueryLimit: 100
141 | QueryStart: 0
142 |
143 | # Settings for Exalead Search
144 | [ExaleadPPTXSearch]
145 | StartQuantity: 30
146 | QueryLimit: 100
147 | QueryStart: 0
148 |
149 | # Settings for Exalead PDF Search
150 | [ExaleadPDFSearch]
151 | StartQuantity: 30
152 | QueryLimit: 20
153 | QueryStart: 0
154 |
155 | # Settings for Exalead DOC Search
156 | [ExaleadDOCSearch]
157 | StartQuantity: 30
158 | QueryLimit: 20
159 | QueryStart: 0
160 |
161 | # Settings for Exalead DOCX Search
162 | [ExaleadDOCXSearch]
163 | StartQuantity: 30
164 | QueryLimit: 20
165 | QueryStart: 0
166 |
167 | # Settings for Exalead XLSX Search
168 | [ExaleadXLSXSearch]
169 | StartQuantity: 30
170 | QueryLimit: 20
171 | QueryStart: 0
172 |
--------------------------------------------------------------------------------
/Common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/Common/__init__.py
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # image base
2 | FROM ubuntu:16.04
3 |
4 | # extra metadata
5 | LABEL maintainer="SimplySecurity"
6 | LABEL description="Dockerfile base for SimplyEmail."
7 |
8 | # env setup
9 | ENV DEBIAN_FRONTEND=noninteractive
10 |
11 | # set the def shell for ENV
12 | SHELL ["/bin/bash", "-c"]
13 |
14 | # install basic build items
15 | RUN apt-get update && apt-get install -qy \
16 | wget \
17 | curl \
18 | git \
19 | sudo \
20 | apt-utils \
21 | lsb-core \
22 | python2.7
23 |
24 | # cleanup image
25 | RUN apt-get -qy clean \
26 | autoremove
27 |
28 |
29 | RUN git clone -b master https://github.com/SimplySecurity/SimplyEmail.git /opt/SimplyEmail && \
30 | cd /opt/SimplyEmail/ && \
31 | ./setup/setup.sh
32 |
33 | WORKDIR "/opt/SimplyEmail"
34 | ENTRYPOINT ["./SimplyEmail.py"]
35 |
--------------------------------------------------------------------------------
/Helpers/CanarioAPI.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import json
4 | import requests
5 |
6 | # https://github.com/CanaryPW/Canary-Python
7 |
8 | # Canary-Python - A framework for the Canary API
9 | # Copyright (C) 2014 Colin Keigher (colin@keigher.ca)
10 |
11 | # This program is free software; you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation; either version 2 of the License, or
14 | # (at your option) any later version.
15 |
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 |
21 | # You should have received a copy of the GNU General Public License along
22 | # with this program; if not, write to the Free Software Foundation, Inc.,
23 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 |
25 |
26 | class canary(object):
27 |
28 | def __init__(s, api_key, host=None, debug=False):
29 | s.api_key = api_key
30 | if debug: # This is really for me and nothing else.
31 | s.url = 'http://%s/_api/?key=%s' % (host, api_key)
32 | else:
33 | s.url = 'https://canar.io/_api/?key=%s' % api_key
34 | s.data = None
35 |
36 | # Simple request made
37 | def retrieve(s, url, data=None, post=False):
38 | if post:
39 | r = requests.post(url, data=data)
40 | else:
41 | r = requests.get(url)
42 | if r.status_code == 200:
43 | s.data = json.loads(r.text)
44 |
45 | # 'data' must be in the form of a dictionary
46 | def build_url(s, data):
47 | d = ['%s=%s' % (x, y) for x, y in data.iteritems()]
48 | return '%s&%s' % (s.url, '&'.join(d))
49 |
50 | # Does a search--whee. Bangs can be specified via separate argument. This is due to plan to make changes to the search for API users
51 | # in the future.
52 | def search(s, query, bang=None):
53 | if bang is not None:
54 | query = '!%s %s' % (bang, query)
55 | url = s.build_url({'action': 'search', 'query': query})
56 | s.retrieve(url=url)
57 | return s.data
58 |
59 | # Views a reference ID. Nothing special.
60 | def view(s, item):
61 | url = s.build_url({'action': 'view', 'item': item})
62 | s.retrieve(url=url)
63 | return s.data
64 |
65 | # Users with the ability to submit data can use this to send. This is not
66 | # documented.
67 | def store(s, title, text, source, source_url):
68 | if title is None:
69 | title = 'Untitled'
70 | data = {'title': title, 'text': text,
71 | 'source': source, 'source_url': source_url}
72 | url = s.build_url({'action': 'store'})
73 | s.retrieve(url=url, data=data, post=True)
74 |
--------------------------------------------------------------------------------
/Helpers/Connect6.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import helpers
3 | import requests
4 | import configparser
5 | import urlparse
6 | import logging
7 | from BeautifulSoup import BeautifulSoup
8 |
9 |
10 | class Connect6Scraper(object):
11 |
12 | '''
13 | A simple class to scrape names from connect6.com
14 | '''
15 |
16 | def __init__(self, domain, Verbose=False):
17 | config = configparser.ConfigParser()
18 | try:
19 | self.logger = logging.getLogger("SimplyEmail.Connect6")
20 | config.read('Common/SimplyEmail.ini')
21 | self.UserAgent = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
23 | self.domain = domain
24 | self.FinalAnswer = ''
25 | self.verbose = Verbose
26 | except Exception as e:
27 | print e
28 |
29 | '''
30 | Try to find the connect6 url for the domain
31 | you are trageting.
32 | '''
33 |
34 | def Connect6AutoUrl(self):
35 | # Using startpage to attempt to get the URL
36 | # https://www.google.com/search?q=site:connect6.com+domain.com
37 | try:
38 | # This returns a JSON object
39 | urllist = []
40 | domain = self.domain.split('.')
41 | url = "https://www.google.com/search?q=site:connect6.com+%22" + \
42 | domain[0] + '%22'
43 | r = requests.get(url, headers=self.UserAgent)
44 | except Exception as e:
45 | error = "[!] Major issue with Google Search: for Connect6 URL" + \
46 | str(e)
47 | print helpers.color(error, warning=True)
48 | try:
49 | rawhtml = r.content
50 | soup = BeautifulSoup(rawhtml)
51 | for a in soup.findAll('a', href=True):
52 | try:
53 | l = urlparse.parse_qs(
54 | urlparse.urlparse(a['href']).query)['q']
55 | if 'site:connect6.com' not in l[0]:
56 | l = l[0].split(":")
57 | urllist.append(l[2])
58 | except:
59 | pass
60 | if urllist:
61 | y = 0
62 | s = 0
63 | for x in urllist:
64 | if "/c" in x:
65 | urllist.insert(s, urllist.pop(y))
66 | s += 1
67 | y += 1
68 | return urllist
69 | except Exception as e:
70 | print e
71 | return urllist
72 |
73 | def Connect6Download(self, url):
74 | '''
75 | Downloads raw source of Connect6 page.
76 | '''
77 | NameList = []
78 | try:
79 | if url.startswith('http') or url.startswith('https'):
80 | r = requests.get(url, headers=self.UserAgent)
81 | else:
82 | url = 'http://' + str(url)
83 | if self.verbose:
84 | p = " [*] Now downloading Connect6 Source: " + str(url)
85 | print helpers.color(p, firewall=True)
86 | r = requests.get(url, headers=self.UserAgent)
87 | except Exception as e:
88 | error = " [!] Major issue with Downloading Connect6 source:" + \
89 | str(e)
90 | print helpers.color(error, warning=True)
91 | try:
92 | if r:
93 | rawhtml = r.content
94 | soup = BeautifulSoup(rawhtml)
95 | try:
96 | for utag in soup.findAll("ul", {"class": "directoryList"}):
97 | for litag in utag.findAll('li'):
98 | NameList.append(litag.text)
99 | if self.verbose:
100 | p = " [*] Connect6 Name Found: " + \
101 | str(litag.text)
102 | print helpers.color(p, firewall=True)
103 | except:
104 | pass
105 | return NameList
106 | # for a in soup.findAll('a', href=True):
107 | except Exception as e:
108 | print e
109 |
110 | def Connect6ParseName(self, raw):
111 | '''
112 | Takes a raw non parsed name from connect 6.
113 | Returns a list of the Name [first, last]
114 | '''
115 | # Adapted by:
116 | # Author: @Harmj0y
117 | # Author Blog: http://t.co/ZYPKvkeayX
118 | # helper to try to parse all the types of naming convent
119 | try:
120 | if raw.strip() != "":
121 | if "(" in raw:
122 | raw = raw.split("(")[0]
123 |
124 | if "," in raw:
125 | raw = raw.split(",")[0]
126 |
127 | if "/" in raw:
128 | raw = raw.split("/")[0]
129 |
130 | raw = raw.strip()
131 |
132 | if raw.endswith("."):
133 | return None
134 |
135 | if len(raw) == 1:
136 | return None
137 |
138 | if "LinkedIn" in raw:
139 | return None
140 |
141 | if "\"" in raw:
142 | return None
143 |
144 | parts = raw.split()
145 |
146 | firstName = ""
147 | lastName = ""
148 |
149 | if len(parts) > 2:
150 | if "(" in parts[1]:
151 | # assume nickname in middle
152 | firstName = parts[0].strip()
153 | lastName = parts[2].strip()
154 | elif len(parts[2]) < 4:
155 | # assume certification
156 | firstName = parts[0].strip()
157 | lastName = parts[1].strip()
158 | else:
159 | # assume FIRST MIDDLE LASTNAME
160 | firstName = parts[0].strip()
161 | lastName = parts[2].strip()
162 |
163 | elif len(parts) == 2:
164 | # assume FIRST LASTNAME
165 | firstName = parts[0].strip()
166 | lastName = parts[1].strip()
167 |
168 | if "." in lastName:
169 | return None
170 |
171 | if len(lastName) < 2:
172 | return None
173 |
174 | if "\"" in lastName:
175 | lastName = lastName.replace("\"", "")
176 |
177 | if "'" in lastName:
178 | lastName = lastName.replace("'", "")
179 |
180 | else:
181 | return [firstName, lastName]
182 | except Exception as e:
183 | e = ' [!] Failed to parse name: ' + str(e)
184 |
--------------------------------------------------------------------------------
/Helpers/Converter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import logging
3 | import docx2txt
4 | from zipfile import ZipFile
5 | # from pptx import Presentation
6 | from subprocess import Popen, PIPE, STDOUT
7 | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
8 | from pdfminer.converter import TextConverter
9 | from pdfminer.layout import LAParams
10 | from pdfminer.pdfpage import PDFPage
11 | from cStringIO import StringIO
12 |
13 |
14 | class Converter(object):
15 |
16 | def __init__(self, verbose=False):
17 | try:
18 | self.logger = logging.getLogger("SimplyEmail.Converter")
19 | self.verbose = verbose
20 | except Exception as e:
21 | print e
22 |
23 | def convert_docx_to_txt(self, path):
24 | """
25 | A very simple conversion function
26 | which returns unicode text for
27 | parsing.
28 |
29 | path = The path to the file
30 | """
31 | # https://github.com/ankushshah89/python-docx2txt
32 | try:
33 | text = docx2txt.process(path)
34 | self.logger.debug("Converted docx to text: " + str(path))
35 | return unicode(text)
36 | except Exception as e:
37 | text = ""
38 | return text
39 | self.logger.error(
40 | "Failed to DOCX to text: " + str(e))
41 |
42 | def convert_doc_to_txt(self, path):
43 | """
44 | A very simple conversion function
45 | which returns text for parsing.
46 |
47 | path = The path to the file
48 | """
49 | try:
50 | cmd = ['antiword', path]
51 | p = Popen(cmd, stdout=PIPE, stderr=STDOUT)
52 | stdout, stderr = p.communicate()
53 | return stdout.decode('ascii', 'ignore')
54 | except Exception as e:
55 | text = ""
56 | return text
57 | self.logger.error(
58 | "Failed to DOC to text: " + str(e))
59 |
60 | # def convert_pptx_to_txt(self, path):
61 | # prs = Presentation(path)
62 | # # text_runs will be populated with a list of strings,
63 | # # one for each text run in presentation
64 | # text_runs = ""
65 | # try:
66 | # for slide in prs.slides:
67 | # try:
68 | # for shape in slide.shapes:
69 | # if not shape.has_text_frame:
70 | # continue
71 | # for paragraph in shape.text_frame.paragraphs:
72 | # for run in paragraph.runs:
73 | # text_runs += str(run.text) + ' '
74 | # except:
75 | # pass
76 | # return text_runs
77 | # except Exception as e:
78 | # if text_runs:
79 | # return text_runs
80 | # else:
81 | # text_runs = ""
82 | # return text_runs
83 | # self.logger.error("Failed to convert pptx: " + str(e))
84 |
85 | def convert_pdf_to_txt(self, path):
86 | """
87 | A very simple conversion function
88 | which returns text for parsing from PDF.
89 |
90 | path = The path to the file
91 | """
92 | try:
93 | rsrcmgr = PDFResourceManager()
94 | retstr = StringIO()
95 | codec = 'utf-8'
96 | laparams = LAParams()
97 | device = TextConverter(
98 | rsrcmgr, retstr, codec=codec, laparams=laparams)
99 | fp = file(path, 'rb')
100 | interpreter = PDFPageInterpreter(rsrcmgr, device)
101 | password = ""
102 | maxpages = 0
103 | caching = True
104 | pagenos = set()
105 | for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password, caching=caching,
106 | check_extractable=True):
107 | interpreter.process_page(page)
108 | text = retstr.getvalue()
109 | fp.close()
110 | device.close()
111 | retstr.close()
112 | return text
113 | except Exception as e:
114 | text = ""
115 | return text
116 | self.logger.error(
117 | "Failed to PDF to text: " + str(e))
118 |
119 | def convert_Xlsx_to_Csv(self, path):
120 | # Using the Xlsx2csv tool seemed easy and was in python anyhow
121 | # it also supported custom delim :)
122 | self.logger.debug("convert_Xlsx_to_Csv on file: " + str(path))
123 | try:
124 | cmd = ['xlsx2csv', path]
125 | p = Popen(cmd, stdout=PIPE, stderr=STDOUT)
126 | stdout, stderr = p.communicate()
127 | text = stdout.decode('ascii', 'ignore')
128 | return text
129 | except Exception as e:
130 | text = ""
131 | return text
132 | self.logger.error(
133 | "Failed to convert_Xlsx_to_Csv to text: " + str(e))
134 |
135 | def convert_zip_to_text(self, path, rawtext=True):
136 | # http://stackoverflow.com/questions/10908877/extracting-a-zipfile-to-memory
137 | try:
138 | self.logger.debug("Attempting to unzip file: " + str(path))
139 | input_zip = ZipFile(path)
140 | if rawtext:
141 | text = ""
142 | a = {name: input_zip.read(name) for name in input_zip.namelist()}
143 | for x in a:
144 | try:
145 | text += str(a[x])
146 | except Exception as e:
147 | print e
148 | # pass
149 | self.logger.debug("Unzip of file complted (raw text): " + str(path))
150 | return text
151 | else:
152 | return {name: input_zip.read(name) for name in input_zip.namelist()}
153 | except Exception as e:
154 | print e
155 | text = ""
156 | return text
157 | self.logger.error(
158 | "Failed unzip file: " + str(e))
--------------------------------------------------------------------------------
/Helpers/Download.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import requests
3 | import os
4 | import configparser
5 | import helpers
6 | import logging
7 | import urllib2
8 | import time
9 | from bs4 import BeautifulSoup
10 | from random import randint
11 |
12 |
13 | class Download(object):
14 |
15 | def __init__(self, verbose=False):
16 | config = configparser.ConfigParser()
17 | try:
18 | self.logger = logging.getLogger("SimplyEmail.Download")
19 | self.verbose = verbose
20 | config.read('Common/SimplyEmail.ini')
21 | self.UserAgent = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
23 | except Exception as e:
24 | print e
25 |
26 | def download_file(self, url, filetype, maxfile=100, verify=True):
27 | """
28 | Downloads a file using requests,
29 |
30 | maxfile=100 in MegaBytes
31 | chunk_size=1024 the bytes to write from mem
32 | """
33 | # using the filename is dangerous, could have UTF8 chars etc.
34 | local_filename = randint(10000, 999999999)
35 | # set name
36 | local_filename = str(local_filename) + str(filetype)
37 | # local_filename = url.split('/')[-1]
38 | # NOTE the stream=True parameter
39 | if url.startswith('http') or url.startswith('https'):
40 | pass
41 | else:
42 | url = 'http://' + str(url)
43 | try:
44 | time.sleep(2)
45 | self.logger.debug("Download started download: " + str(url))
46 | r = requests.get(url, stream=True, headers=self.UserAgent, verify=verify)
47 | with open(local_filename, 'wb+') as f:
48 | for chunk in r.iter_content(chunk_size=1024):
49 | if chunk:
50 | # filter out keep-alive new chunks
51 | f.write(chunk)
52 | # f.flush() commented by recommendation from
53 | # J.F.Sebastian
54 | download = os.path.isfile(local_filename)
55 | return local_filename, download
56 | except Exception as e:
57 | if self.verbose:
58 | p = ' [*] Download of file failed: ' + e
59 | print helpers.color(p, firewall=True)
60 | self.logger.error("Failed to download file: " + str(url) + ' error: ' + str(e))
61 | download = os.path.isfile(local_filename)
62 | return local_filename, download
63 |
64 | def download_file2(self, url, filetype, timeout=10):
65 | # using the filename is dangerous, could have UTF8 chars etc.
66 | local_filename = randint(10000, 999999999)
67 | # set name
68 | local_filename = str(local_filename) + str(filetype)
69 | # local_filename = url.split('/')[-1]
70 | # NOTE the stream=True parameter
71 | if url.startswith('http') or url.startswith('https'):
72 | pass
73 | else:
74 | # small hack till I figure out google cache errors
75 | url = 'http://' + str(url)
76 | try:
77 | self.logger.debug("Download2 started download: " + str(url))
78 | response = urllib2.urlopen(url, timeout=timeout)
79 | data = response.read()
80 | download = os.path.isfile(local_filename)
81 | except urllib2.HTTPError, e:
82 | self.logger.debug('urllib2 HTTPError: ' + e)
83 | except urllib2.URLError, e:
84 | self.logger.debug('urllib2 URLError: ' + e)
85 | except urllib2.HTTPException, e:
86 | self.logger.debug('urllib2 HTTPException: ' + e)
87 | except Exception as e:
88 | if self.verbose:
89 | p = ' [*] Download2 of file failed: ' + e
90 | print helpers.color(p, firewall=True)
91 | self.logger.error("Failed to download2 file: " + str(e))
92 | try:
93 | with open(local_filename, 'wb+') as f:
94 | f.write(data)
95 | download = os.path.isfile(local_filename)
96 | self.logger.debug("Download2 completed fully: " + str(url))
97 | return local_filename, download
98 | except:
99 | download = os.path.isfile(local_filename)
100 | return local_filename, download
101 |
102 | def delete_file(self, local_filename):
103 | # Deletes a file from local path
104 | try:
105 | if os.path.isfile(local_filename):
106 | os.remove(local_filename)
107 | self.logger.debug("File deleted: " + str(local_filename))
108 | else:
109 | if self.verbose:
110 | p = ' [*] File not found to remove : ' + local_filename
111 | print helpers.color(p, firewall=True)
112 | except Exception as e:
113 | self.logger.error("Failed to delete file: " + str(e))
114 | if self.verbose:
115 | print e
116 |
117 | def GoogleCaptchaDetection(self, RawHtml):
118 | soup = BeautifulSoup(RawHtml, "lxml")
119 | if "Our systems have detected unusual traffic" in soup.text:
120 | p = " [!] Google Captcha was detected! (For best results resolve/restart -- Increase sleep/jitter in SimplyEmail.ini)"
121 | self.logger.warning("Google Captcha was detected!")
122 | print helpers.color(p, warning=True)
123 | return True
124 | else:
125 | return False
126 |
127 | def requesturl(self, url, useragent, timeout=10, retrytime=5, statuscode=False, raw=False, verify=True):
128 | """
129 | A very simple request function
130 | This is setup to handle the following parms:
131 |
132 | url = the passed in url to request
133 | useragent = the useragent to use
134 | timeout = how long to wait if no "BYTES" rec
135 |
136 | Exception handling will also retry on the event of
137 | a timeout and warn the user.
138 | """
139 | rawhtml = ""
140 | try:
141 | r = requests.get(url, headers=self.UserAgent, timeout=timeout, verify=verify)
142 | rawhtml = r.content
143 | self.logger.debug(
144 | 'Request completed: code = ' + str(r.status_code) + ' size = ' + str(len(rawhtml)) + ' url = ' + str(url))
145 | except requests.exceptions.Timeout:
146 | # set up for a retry
147 | if self.verbose:
148 | p = ' [!] Request for url timed out, retrying: ' + url
149 | self.logger.info('Request timed out, retrying: ' + url)
150 | print helpers.color(p, firewall=True)
151 | r = requests.get(url, headers=self.UserAgent, timeout=retrytime, verify=verify)
152 | rawhtml = r.content
153 | except requests.exceptions.TooManyRedirects:
154 | # fail and move on, alert user
155 | if self.verbose:
156 | p = ' [!] Request for url resulted in bad url: ' + url
157 | self.logger.error(
158 | 'Request for url resulted in bad url: ' + url)
159 | print helpers.color(p, warning=True)
160 | except requests.exceptions.RequestException as e:
161 | # catastrophic error. bail.
162 | if self.verbose:
163 | p = ' [!] Request for url resulted in major error: ' + str(e)
164 | self.logger.critical(
165 | 'Request for url resulted in major error: ' + str(e))
166 | print helpers.color(p, warning=True)
167 | except Exception as e:
168 | p = ' [!] Request for url resulted in unhandled error: ' + str(e)
169 | self.logger.critical(
170 | 'Request for url resulted in unhandled error: ' + str(e))
171 | # just return blank data if failed
172 | # to prevent bails
173 | if statuscode:
174 | # return status code and html
175 | status = r.status_code
176 | return rawhtml, status
177 | elif raw:
178 | # return raw request object
179 | return r
180 | else:
181 | return rawhtml
182 |
--------------------------------------------------------------------------------
/Helpers/LinkedinNames.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import helpers
3 | import configparser
4 | import mechanize
5 | from bs4 import BeautifulSoup
6 |
7 |
8 | # This class has been adapted from (@pan0pt1c0n):
9 | # https://github.com/pan0pt1c0n/PhishBait/blob/master/Bing_Scraper.py
10 |
11 | class LinkedinScraper(object):
12 |
13 | '''
14 | A simple class to scrape names from bing.com for
15 | LinkedIn names.
16 | '''
17 |
18 | def __init__(self, domain, Verbose=False):
19 | config = configparser.ConfigParser()
20 | try:
21 | config.read('Common/SimplyEmail.ini')
22 | self.UserAgent = {
23 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
24 | self.domain = domain
25 | self.FinalAnswer = ''
26 | self.verbose = Verbose
27 | except Exception as e:
28 | print e
29 |
30 | def LinkedInNames(self):
31 | # This function simply uses
32 | # Bing to scrape for names and
33 | # returns a list of list names.
34 | try:
35 | br = mechanize.Browser()
36 | br.set_handle_robots(False)
37 | self.domain = self.domain.split('.')
38 | self.domain = self.domain[0]
39 | r = br.open('http://www.bing.com/search?q=(site%3A%22www.linkedin.com%2Fin%2F%22%20OR%20site%3A%22www.linkedin.com%2Fpub%2F%22)%20%26%26%20(NOT%20site%3A%22www.linkedin.com%2Fpub%2Fdir%2F%22)%20%26%26%20%22' +
40 | self.domain + '%22&qs=n&form=QBRE&pq=(site%3A%22www.linkedin.com%2Fin%2F%22%20or%20site%3A%22www.linkedin.com%2Fpub%2F%22)%20%26%26%20(not%20site%3A%22www.linkedin.com%2Fpub%2Fdir%2F%22)%20%26%26%20%22'+self.domain+'%22')
41 | soup = BeautifulSoup(r, 'lxml')
42 | if soup:
43 | link_list = []
44 | namelist = []
45 | more_records = True
46 | Round = False
47 | while more_records:
48 | if Round:
49 | response = br.follow_link(text="Next")
50 | soup = BeautifulSoup(response)
51 | # enter this loop to parse all results
52 | # also follow any seondary links
53 | for definition in soup.findAll('h2'):
54 | definition = definition.renderContents()
55 | if "LinkedIn" in definition:
56 | name = (((((definition.replace('', '')).replace(
57 | '', '')).split('>')[1]).split('|')[0]).rstrip()).split(',')[0]
58 | name = name.split(' ')
59 | if self.verbose:
60 | e = ' [*] LinkedIn Name Found: ' + str(name)
61 | print helpers.color(e, firewall=True)
62 | namelist.append(name)
63 | for link in br.links():
64 | link_list.append(link.text)
65 | if "Next" in link_list:
66 | more_records = True
67 | Round = True
68 | else:
69 | more_records = False
70 | if namelist:
71 | return namelist
72 | except Exception as e:
73 | error = " [!] Major issue with Downloading LinkedIn source:" + \
74 | str(e)
75 | print helpers.color(error, warning=True)
76 | if namelist:
77 | return namelist
78 |
79 | def LinkedInClean(self, raw):
80 | '''
81 | This function simply uses clean names.
82 | '''
83 | try:
84 | if raw:
85 | firstname = raw[0]
86 | lastname = raw[1]
87 | try:
88 | if "'" in firstname:
89 | firstname = firstname.replace("'", "")
90 | if "-" in firstname:
91 | firstname = firstname.replace("-", "")
92 | if " " in firstname:
93 | firstname = firstname.replace(" ", "")
94 | if "," in firstname:
95 | firstname = firstname.replace(",", "")
96 | if "(" in firstname:
97 | firstname = firstname.replace("(", "")
98 | if ")" in firstname:
99 | firstname = firstname.replace(")", "")
100 | if "'" in lastname:
101 | lastname = lastname.replace("'", "")
102 | if "-" in lastname:
103 | lastname = lastname.replace("-", "")
104 | if " " in lastname:
105 | lastname = lastname.replace(" ", "")
106 | if "," in lastname:
107 | lastname = lastname.replace(",", "")
108 | if "(" in lastname:
109 | lastname = lastname.replace("(", "")
110 | if ")" in lastname:
111 | lastname = lastname.replace(")", "")
112 | if ("@" in firstname) or ("@" in lastname):
113 | return None
114 | except Exception as e:
115 | pass
116 | try:
117 | if raw[3]:
118 | firstname = raw[0]
119 | lastname = raw[3]
120 | return [firstname, lastname]
121 | except Exception as e:
122 | pass
123 | if self.verbose:
124 | e = ' [*] Name Cleaned: ' + str([firstname, lastname])
125 | print helpers.color(e, firewall=True)
126 | return [firstname, lastname]
127 | except Exception as e:
128 | if self.verbose:
129 | h = " [!] Error during name building: " + str(e)
130 | print helpers.color(h, warning=True)
131 | return None
132 |
--------------------------------------------------------------------------------
/Helpers/Parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding=utf8
3 |
4 | import os
5 | import re
6 | import logging
7 | import string
8 | import subprocess
9 | import time
10 | from random import randint
11 | import helpers
12 |
13 | # Simple Parser Options for email enumeration.
14 |
15 | # Taken from theHarvester
16 |
17 |
18 | class Parser(object):
19 |
20 | def __init__(self, InputData):
21 | self.InputData = InputData
22 | self.logger = logging.getLogger("SimplyEmail.Parser")
23 | #self.domain = domain
24 |
25 | # A really good url clean by theHarvester at :
26 | # https://raw.githubusercontent.com/killswitch-GUI/theHarvester/master/myparser.py
27 | def genericClean(self):
28 | self.InputData = re.sub('', '', self.InputData)
29 | self.InputData = re.sub('', '', self.InputData)
30 | self.InputData = re.sub('', '', self.InputData)
31 | self.InputData = re.sub('', '', self.InputData)
32 | self.InputData = re.sub('%2f', ' ', self.InputData)
33 | self.InputData = re.sub('%3a', ' ', self.InputData)
34 | self.InputData = re.sub('', '', self.InputData)
35 | self.InputData = re.sub('', '', self.InputData)
36 | self.InputData = re.sub('
', ' ', self.InputData)
37 | self.InputData = re.sub('
', ' ', self.InputData)
38 | self.InputData = re.sub('', ' ', self.InputData)
39 |
40 | for e in (',', '>', ':', '=', '<', '/', '\\', ';', '&', '%3A', '%3D', '%3C', '"', '"'):
41 | self.InputData = string.replace(self.InputData, e, ' ')
42 |
43 | # A really good url clean by theHarvester at :
44 | # https://raw.githubusercontent.com/killswitch-GUI/theHarvester/master/myparser.py
45 | def urlClean(self):
46 | self.InputData = re.sub('', '', self.InputData)
47 | self.InputData = re.sub('', '', self.InputData)
48 | self.InputData = re.sub('%2f', ' ', self.InputData)
49 | self.InputData = re.sub('%3a', ' ', self.InputData)
50 | for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
51 | self.InputData = string.replace(self.InputData, e, ' ')
52 |
53 | # http://stackoverflow.com/questions/32747648/
54 | # ascii-codec-cant-encode-character-u-u2019-ordinal-out-of-range128
55 | def RemoveUnicode(self):
56 | """ (str|unicode) -> (str|unicode)
57 |
58 | recovers ascii content from string_data
59 | """
60 | try:
61 | string_data = self.InputData
62 | if string_data is None:
63 | return string_data
64 | if isinstance(string_data, str):
65 | string_data = str(string_data.decode('ascii', 'ignore'))
66 | else:
67 | string_data = string_data.encode('ascii', 'ignore')
68 | remove_ctrl_chars_regex = re.compile(r'[^\x20-\x7e]')
69 | self.InputData = remove_ctrl_chars_regex.sub('', string_data)
70 | except Exception as e:
71 | self.logger.error('UTF8 decoding issues' + str(e))
72 | p = '[!] UTF8 decoding issues Matching: ' + str(e)
73 | print helpers.color(p, firewall=True)
74 |
75 | def FindEmails(self):
76 | Result = []
77 | match = re.findall('[\w\.-]+@[\w\.-]+', self.InputData)
78 | for item in match:
79 | Result.append(item)
80 | #emails = self.unique()
81 | return Result
82 |
83 | def GrepFindEmails(self):
84 | # Major hack during testing;
85 | # I found grep is was better at Regex than re in python
86 | FinalOutput = []
87 | StartFileName = randint(1000, 999999)
88 | EndFileName = randint(1000, 999999)
89 | val = ""
90 | try:
91 | with open(str(StartFileName), "w+") as myfile:
92 | myfile.write(self.InputData)
93 | ps = subprocess.Popen(
94 | ('grep', "@", str(StartFileName)), stdout=subprocess.PIPE)
95 | val = subprocess.check_output(("grep", "-i", "-o", '[A-Z0-9._%+-]\+@[A-Z0-9.-]\+\.[A-Z]\{2,4\}'),
96 | stdin=ps.stdout)
97 | # Start Email Evasion Check
98 | # This will be a seprate func to handle the lager sets of data
99 | EvasionVal = self.EmailEvasionCheck(ps)
100 | except Exception as e:
101 | pass
102 | #p = '[!] Pattern Matching Issue: ' + str(e)
103 | # print helpers.color(p, firewall=True)
104 | # Remove this line for Debuging pages
105 | os.remove(str(StartFileName))
106 | if len(val) > 0:
107 | with open(str(EndFileName), "w") as myfile:
108 | myfile.write(str(val))
109 | with open(str(EndFileName), "r") as myfile:
110 | output = myfile.readlines()
111 | os.remove(str(EndFileName))
112 | for item in output:
113 | FinalOutput.append(item.rstrip("\n"))
114 | return FinalOutput
115 |
116 | def EmailEvasionCheck(self, data):
117 | try:
118 | val = subprocess.check_output(("grep", "-i", "-o", '[A-Z0-9._%+-]\+\s+@+\s[A-Z0-9.-]\+\.[A-Z]\{2,4\}'),
119 | stdin=data.stdout)
120 | except:
121 | pass
122 |
123 | def CleanListOutput(self):
124 | FinalOutput = []
125 | for item in self.InputData:
126 | FinalOutput.append(item.rstrip("\n"))
127 | return FinalOutput
128 |
129 | def BuildResults(self, InputList, ModuleName):
130 | # Will use a generator expression to assign
131 | # emails to Keys and place into a list
132 | FinalOutput = []
133 | ModuleName = '"' + str(ModuleName) + '"'
134 | # build dict and append to list
135 | for email in InputList:
136 | email = '"' + str(email) + '"'
137 | ListItem = "{'Email': " + email + ", 'Source': " + ModuleName + "}"
138 | FinalOutput.append(ListItem)
139 | return FinalOutput
140 |
141 | def BuildJson(self, InputList, ModuleName):
142 | FinalOutput = []
143 | currentDate = str(time.strftime("%d/%m/%Y"))
144 | currentTime = str(time.strftime("%H:%M:%S"))
145 | moduleName = str(ModuleName)
146 | for email in InputList:
147 | obj = {
148 | 'email': email,
149 | 'module_name': moduleName,
150 | 'collection_time': currentTime,
151 | 'collection_data': currentDate,
152 | }
153 | FinalOutput.append(obj)
154 | # print FinalOutput
155 | return FinalOutput
156 |
157 |
158 | def extendedclean(self, modulename):
159 | self.genericClean()
160 | self.urlClean()
161 | finaloutput = self.GrepFindEmails()
162 | htmlresults = self.BuildResults(finaloutput, modulename)
163 | return finaloutput, htmlresults
164 |
--------------------------------------------------------------------------------
/Helpers/VerifyEmails.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import configparser
3 | import helpers
4 | import dns.resolver
5 | import socket
6 | import smtplib
7 |
8 |
9 | class VerifyEmail(object):
10 |
11 | '''
12 | Takes a domain name and an array of emails
13 | '''
14 |
15 | def __init__(self, email, email2, domain, Verbose=False):
16 | config = configparser.ConfigParser()
17 | try:
18 | config.read('Common/SimplyEmail.ini')
19 | self.UserAgent = {
20 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
21 | self.domain = domain
22 | self.email = email + email2
23 | self.mxhost = ""
24 | self.FinalList = []
25 | self.verbose = True
26 | except Exception as e:
27 | print e
28 |
29 | def VerifyEmail(self, email, email2):
30 | '''
31 | Takes one email and checks if it is valid.
32 | '''
33 | # Idea from:
34 | # https://www.scottbrady91.com/Email-Verification/Python-Email-Verification-Script
35 | hostname = socket.gethostname()
36 | socket.setdefaulttimeout(10)
37 | server = smtplib.SMTP(timeout=10)
38 | server.set_debuglevel(0)
39 | try:
40 | if self.verbose:
41 | e = " [*] Checking for valid email: " + str(email)
42 | print helpers.color(e, firewall=True)
43 | server.connect(self.mxhost['Host'])
44 | server.helo(hostname)
45 | server.mail('email@gmail.com')
46 | code, message = server.rcpt(str(email))
47 | server.quit()
48 | except Exception as e:
49 | print e
50 | if code == 250:
51 | return True
52 | else:
53 | return False
54 |
55 | def VerifySMTPServer(self):
56 | '''
57 | Checks for code other than 250 for crap email.
58 | '''
59 | # Idea from:
60 | # https://www.scottbrady91.com/Email-Verification/Python-Email-Verification-Script
61 | hostname = socket.gethostname()
62 | socket.setdefaulttimeout(10)
63 | server = smtplib.SMTP(timeout=10)
64 | server.set_debuglevel(0)
65 | addressToVerify = "There.Is.Knowwaythiswillwork1234567@" + \
66 | str(self.domain)
67 | try:
68 | server.connect(self.mxhost['Host'])
69 | server.helo(hostname)
70 | server.mail('email@gmail.com')
71 | code, message = server.rcpt(str(addressToVerify))
72 | server.quit()
73 | if code == 250:
74 | return False
75 | else:
76 | return True
77 | except Exception as e:
78 | print e
79 |
80 | def GetMX(self):
81 | MXRecord = []
82 | try:
83 | if self.verbose:
84 | print helpers.color(' [*] Attempting to resolve MX records!', firewall=True)
85 | answers = dns.resolver.query(self.domain, 'MX')
86 | for rdata in answers:
87 | data = {
88 | "Host": str(rdata.exchange),
89 | "Pref": int(rdata.preference),
90 | }
91 | MXRecord.append(data)
92 | # Now find the lowest value in the pref
93 | Newlist = sorted(MXRecord, key=lambda k: k['Pref'])
94 | # Set the MX record
95 | self.mxhost = Newlist[0]
96 | if self.verbose:
97 | val = ' [*] MX Host: ' + str(self.mxhost['Host'])
98 | print helpers.color(val, firewall=True)
99 | except Exception as e:
100 | error = ' [!] Failed to get MX record: ' + str(e)
101 | print helpers.color(error, warning=True)
102 |
103 | def ExecuteVerify(self):
104 | self.GetMX()
105 | Value = self.VerifySMTPServer()
106 | if Value:
107 | for item in self.email:
108 | IsTrue = self.VerifyEmail(item)
109 | if IsTrue:
110 | e = " [!] Email seems valid: " + str(item)
111 | print helpers.color(e, status=True)
112 | self.FinalList.append(item)
113 | else:
114 | if self.verbose:
115 | e = " [!] Checks show email is not valid: " + str(item)
116 | print helpers.color(e, firewall=True)
117 | else:
118 | e = " [!] Checks show 'Server Is Catch All' on: " + \
119 | str(self.mxhost['Host'])
120 | print helpers.color(e, warning=True)
121 |
122 | return self.FinalList
123 |
--------------------------------------------------------------------------------
/Helpers/VersionCheck.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import requests
3 | import configparser
4 | import helpers
5 | import logging
6 |
7 |
8 | class VersionCheck(object):
9 |
10 | def __init__(self, version):
11 | config = configparser.ConfigParser()
12 | try:
13 | self.logger = logging.getLogger("SimplyEmail.VersionCheck")
14 | self.version = str(version)
15 | config.read('Common/SimplyEmail.ini')
16 | self.Start = config['GlobalSettings']['VersionRepoCheck']
17 | self.RepoLocation = str(
18 | config['GlobalSettings']['VersionRepoCheckLocation'])
19 | self.UserAgent = {
20 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
21 | except Exception as e:
22 | print e
23 |
24 | def VersionRequest(self):
25 | if self.Start == "Yes":
26 | self.logger.info("Verison / Update request started")
27 | try:
28 | r = requests.get(self.RepoLocation, headers=self.UserAgent, timeout=3)
29 | results = r.content
30 | results = results.rstrip('\n')
31 | if str(results) != str(self.version):
32 | p = " [!] Newer Version Available, Re-Run Setup.sh to update!"
33 | print helpers.color(p, warning=True, bold=False)
34 | self.logger.info(
35 | "Version / Update returned newer Version Available")
36 | self.logger.info("Version / Update request completed OK")
37 | except Exception as e:
38 | error = " [!] Fail during Request to Update/Version Check (Check Connection)"
39 | self.logger.error(
40 | "Fail during Request to Update/Version Check (Check Connection)" + str(e))
41 | print helpers.color(error, warning=True)
42 |
--------------------------------------------------------------------------------
/Helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/Helpers/__init__.py
--------------------------------------------------------------------------------
/Helpers/bootstrap-3.3.5/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2011-2015 Twitter, Inc
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Helpers/bootstrap-3.3.5/Screen Shot 2015-11-11 at 5.27.15 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/Helpers/bootstrap-3.3.5/Screen Shot 2015-11-11 at 5.27.15 PM.png
--------------------------------------------------------------------------------
/Helpers/bootstrap-3.3.5/Screen Shot 2015-11-11 at 5.27.31 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/Helpers/bootstrap-3.3.5/Screen Shot 2015-11-11 at 5.27.31 PM.png
--------------------------------------------------------------------------------
/Helpers/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import textwrap
5 | import logging
6 | import time
7 | import magic
8 | import json
9 | import configparser
10 | import collections
11 | import random
12 | from fake_useragent import UserAgent
13 |
14 | def dictToJson(inputDict):
15 | """
16 | Takes in a list of dict items.
17 | Converts them to json and returns list of json obj.
18 | """
19 | obj = []
20 | for item in inputDict:
21 | obj += json.dumps(item)
22 | return obj
23 |
24 | def get_searchid():
25 | currentDate = str(time.strftime("%d%m%Y"))
26 | currentTime = str(time.strftime("%H%M%S"))
27 | searchid = currentDate + currentTime
28 | return searchid
29 |
30 | def get_datetime():
31 | currentDate = str(time.strftime("%d/%m/%Y"))
32 | currentTime = str(time.strftime("%H:%M:%S"))
33 | datetime = currentDate + ' ' +currentTime
34 | return datetime
35 |
36 | def JsonListToJsonObj(inputJsonList, domain):
37 | """
38 | Takes a list of json objects,
39 | places them in a key and returns the data.
40 | """
41 | currentDate = str(time.strftime("%d/%m/%Y"))
42 | currentTime = str(time.strftime("%H:%M:%S"))
43 | currentTool = "SimplyEmail"
44 | config = configparser.ConfigParser()
45 | config.read('Common/SimplyEmail.ini')
46 | currentVersion = str(config['GlobalSettings']['Version'])
47 | count = len(inputJsonList)
48 | dic = collections.OrderedDict()
49 | dic = {
50 | "domain_of_collection" : domain,
51 | "data_of_collection" : currentDate,
52 | "time_of_collection" : currentTime,
53 | "tool_of_collection" : currentTool,
54 | "current_version" : currentVersion,
55 | "email_collection_count" : count,
56 | "emails" : inputJsonList,
57 | }
58 | obj = json.dumps(dic, indent=4, sort_keys=True)
59 | return obj
60 |
61 | def color(string, status=True, warning=False, bold=True, blue=False, firewall=False):
62 | # Change text color for the linux terminal, defaults to green.
63 | # Set "warning=True" for red.
64 | # stolen from Veil :)
65 | attr = []
66 | if status:
67 | # green
68 | attr.append('32')
69 | if warning:
70 | # red
71 | attr.append('31')
72 | if bold:
73 | attr.append('1')
74 | if firewall:
75 | attr.append('33')
76 | if blue:
77 | # blue
78 | attr.append('34')
79 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
80 |
81 |
82 | def formatLong(title, message, frontTab=True, spacing=16):
83 | """
84 | Print a long title:message with our standardized formatting.
85 | Wraps multiple lines into a nice paragraph format.
86 | """
87 |
88 | lines = textwrap.wrap(textwrap.dedent(message).strip(), width=50)
89 | returnstring = ""
90 |
91 | i = 1
92 | if len(lines) > 0:
93 | if frontTab:
94 | returnstring += "\t%s%s" % (('{0: <%s}' %
95 | spacing).format(title), lines[0])
96 | else:
97 | returnstring += " %s%s" % (('{0: <%s}' %
98 | (spacing-1)).format(title), lines[0])
99 | while i < len(lines):
100 | if frontTab:
101 | returnstring += "\n\t"+' '*spacing+lines[i]
102 | else:
103 | returnstring += "\n"+' '*spacing+lines[i]
104 | i += 1
105 | return returnstring
106 |
107 |
108 | def DirectoryListing(directory):
109 | # Returns a list of dir's of results
110 | dirs = []
111 | for (dir, _, files) in os.walk(directory):
112 | for f in files:
113 | path = os.path.join(dir, f)
114 | if os.path.exists(path):
115 | dirs.append(path)
116 | return dirs
117 |
118 | def split_email(email):
119 | email = email.lower()
120 | se = email.split("@")
121 | return se
122 |
123 | def getua():
124 | # gets a random useragent and returns the UA
125 | ua = UserAgent()
126 | return ua.random
127 |
128 | def modsleep(delay, jitter=0):
129 | # Quick Snipit From EmPyre Agent (@HarmJ0y)
130 | if jitter < 0: jitter = -jitter
131 | if jitter > 1: jitter = 1/jitter
132 |
133 | minSleep = int((1.0-jitter)*delay)
134 | maxSleep = int((1.0+jitter)*delay)
135 | sleepTime = random.randint(minSleep, maxSleep)
136 | time.sleep(int(sleepTime))
137 |
138 | def filetype(path):
139 | m = magic.from_file(str(path))
140 | return m
141 |
142 | #######################
143 | # Setup Logging Class #
144 | #######################
145 |
146 |
147 | class log(object):
148 |
149 | """simple logging testing and dev"""
150 |
151 | def __init__(self):
152 | self.name = ".SimplyEmail.log"
153 |
154 | def start(self):
155 | logger = logging.getLogger("SimplyEmail")
156 | logger.setLevel(logging.INFO)
157 | fh = logging.FileHandler(self.name)
158 | formatter = logging.Formatter(
159 | '%(asctime)s-[%(name)s]-[%(levelname)s]- %(message)s')
160 | fh.setFormatter(formatter)
161 | logger.addHandler(fh)
162 | logger.info("Program started")
163 | logging.captureWarnings(True)
164 | logger.info("Set Logging Warning Capture: True")
165 |
166 | def infomsg(self, message, modulename):
167 | try:
168 | msg = 'SimplyEmail.' + str(modulename)
169 | logger = logging.getLogger(msg)
170 | logger.info(str(message))
171 | except Exception as e:
172 | print e
173 |
174 | def warningmsg(self, message, modulename):
175 | try:
176 | msg = 'SimplyEmail.' + str(modulename)
177 | logger = logging.getLogger(msg)
178 | logger.warning(str(message))
179 | except Exception as e:
180 | print e
181 |
--------------------------------------------------------------------------------
/Helpers/messages.py:
--------------------------------------------------------------------------------
1 | import helpers
2 |
3 |
4 | def email_count(text, Module):
5 | Length = " [*] " + Module + \
6 | ": Gathered " + str(text) + " Email(s)!"
7 | print helpers.color(Length, status=True)
--------------------------------------------------------------------------------
/Modules/AskSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Port from theHarvester! Shout out to him for the code:
3 | # https://github.com/laramies/theHarvester/blob/master/discovery/asksearch.py
4 | import configparser
5 | import logging
6 | from Helpers import Download
7 | from Helpers import Parser
8 | from Helpers import helpers
9 |
10 | # Class will have the following properties:
11 | # 1) name / description
12 | # 2) main name called "ClassName"
13 | # 3) execute function (calls everything it needs)
14 | # 4) places the findings into a queue
15 |
16 |
17 | class ClassName(object):
18 |
19 | def __init__(self, Domain, verbose=False):
20 | self.apikey = False
21 | self.logger = logging.getLogger("SimplyEmail.AskSearch")
22 | self.name = "Ask Search for Emails"
23 | self.description = "Simple Ask Search for Emails"
24 | config = configparser.ConfigParser()
25 | try:
26 | config.read('Common/SimplyEmail.ini')
27 | self.UserAgent = {
28 | 'User-Agent': helpers.getua()}
29 | self.PageLimit = int(config['AskSearch']['QueryPageLimit'])
30 | self.Counter = int(config['AskSearch']['QueryStart'])
31 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
32 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
33 | self.Domain = Domain
34 | self.verbose = verbose
35 | self.Html = ""
36 | except Exception as e:
37 | self.logger.critical(
38 | 'AskSearch module failed to load: ' + str(e))
39 | print helpers.color("[*] Major Settings for Ask Search are missing, EXITING!\n", warning=True)
40 |
41 | def execute(self):
42 | self.logger.debug("AskSearch module started")
43 | self.process()
44 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
45 | return FinalOutput, HtmlResults, JsonResults
46 |
47 | def process(self):
48 | dl = Download.Download(self.verbose)
49 | while self.Counter <= self.PageLimit:
50 | if self.verbose:
51 | p = ' [*] AskSearch on page: ' + str(self.Counter)
52 | print helpers.color(p, firewall=True)
53 | self.logger.info('AskSearch on page: ' + str(self.Counter))
54 | try:
55 | url = 'http://www.ask.com/web?q=@' + str(self.Domain) + \
56 | '&pu=10&page=' + str(self.Counter)
57 | except Exception as e:
58 | error = " [!] Major issue with Ask Search:" + str(e)
59 | self.logger.error('Major issue with Ask Search: ' + str(e))
60 | print helpers.color(error, warning=True)
61 | try:
62 | rawhtml = dl.requesturl(url, useragent=self.UserAgent)
63 | except Exception as e:
64 | error = " [!] Fail during Request to Ask (Check Connection):" + \
65 | str(e)
66 | self.logger.error(
67 | 'Fail during Request to Ask (Check Connection): ' + str(e))
68 | print helpers.color(error, warning=True)
69 | self.Html += rawhtml
70 | self.Counter += 1
71 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
72 |
73 |
74 | def get_emails(self):
75 | parse = Parser.Parser(self.Html)
76 | FinalOutput, HtmlResults = parse.extendedclean(self.name)
77 | JsonResults = parse.BuildJson(FinalOutput, self.name)
78 | self.logger.debug('AskSearch completed search')
79 | return FinalOutput, HtmlResults, JsonResults
80 |
--------------------------------------------------------------------------------
/Modules/CanarioAPI.py.old:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import configparser
5 | from Helpers import Parser
6 | from Helpers import helpers
7 | from Helpers import CanarioAPI
8 |
9 | # Class will have the following properties:
10 | # 1) name / description
11 | # 2) main name called "ClassName"
12 | # 3) execute function (calls everything it needs)
13 | # 4) places the findings into a queue
14 |
15 | # This method will do the following:
16 | # 1) Get raw HTML for lets say enron.com )
17 | # This is mainly do to the API not supporting code searched with out known repo or user
18 | # :(https://canary.pw/search/?q=earthlink.net&page=3)
19 | # 2) Use beautiful soup to parse the results of the first (5) pages for tags that start with "/view/"
20 | # 3) Ueses a list of URL's and places that raw HTML into a on value
21 | # 4) Sends to parser for results
22 |
23 | # Some considerations are the returned results: max 100 it seems
24 | # API may return a great array of results - This will be added later
25 | # Still having some major python errors
26 |
27 |
28 | class ClassName(object):
29 |
30 | def __init__(self, domain, verbose=False):
31 | self.apikey = True
32 | self.name = "Canar.io API PasteBin search"
33 | self.description = "Search Canar.io for paste potential data dumps, this can take a bit but a great source"
34 | self.domain = domain
35 | self.verbose = verbose
36 | config = configparser.ConfigParser()
37 | self.Html = ""
38 | try:
39 | config.read('Common/SimplyEmail.ini')
40 | self.Depth = int(config['CanaryPasteBin']['PageDepth'])
41 | self.Counter = int(config['CanaryPasteBin']['QueryStart'])
42 | self.apikeyv = str(config['APIKeys']['Canario'])
43 | except:
44 | print helpers.color(" [*] Major Settings for Canar.io Search are missing, EXITING!\n", warning=True)
45 |
46 | def execute(self):
47 | self.process()
48 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
49 | return FinalOutput, HtmlResults, JsonResults
50 |
51 | def process(self):
52 | try:
53 | c = CanarioAPI.canary(api_key=str(self.apikeyv))
54 | s = c.search(self.domain)
55 | try:
56 | if str(s['data']['error_msg']):
57 | error = str(s['data']['error_msg'])
58 | e = " [!] Check your key and Canar.io limit: " + error
59 | print helpers.color(e, warning=True)
60 | except:
61 | pass
62 | if str(s['action_valid']).lower() == 'true':
63 | if self.verbose:
64 | print helpers.color(' [*] Canario query valid!')
65 | refid = []
66 | count = int(s['data']['results']['count'])
67 | if self.verbose:
68 | e = ' [*] Canario result count: ' + str(count)
69 | print helpers.color(e, firewall=True)
70 | if count > 0:
71 | re = s['data']['results']['results']
72 | # We need to make this a setting ?
73 | for i in range(0, count, 1):
74 | try:
75 | result = re[i]
76 | refid.append(str(result['referenceid']))
77 | except Exception as e:
78 | print e
79 | if len(refid) > 0:
80 | # now enumerate the data in each refid
81 | for ref in refid:
82 | if self.verbose:
83 | e = ' [*] Now enumerating refid: ' + str(ref)
84 | print helpers.color(e, firewall=True)
85 | try:
86 | view = c.view(ref)
87 | try:
88 | emails = view['data']['objects']['email']
89 | for em in emails:
90 | self.Html += str(em) + " "
91 | except:
92 | # just for weird cases where there is no
93 | # values
94 | pass
95 | try:
96 | body = view['data']['text_data']['body']
97 | self.Html += str(body) + ' '
98 | except:
99 | # just for weird cases where there is no
100 | # values
101 | pass
102 | except Exception as e:
103 | p = " [!] Unable to enumerate Canario ref ID: " + \
104 | str(e)
105 | print helpers.color(p, warning=True)
106 | else:
107 | # logic to see if the key is bad
108 | print ""
109 | except Exception as e:
110 | l = ' [!] Check your Canario API key: ' + str(e)
111 | print helpers.color(l, warning=True)
112 |
113 | def get_emails(self):
114 | # You must report back with parsing errors!!!
115 | # in one case I have seen alex@gmail.com:Password
116 | # This will break most Reg-Ex
117 | Parse = Parser.Parser(self.Html)
118 | Parse.genericClean()
119 | Parse.urlClean()
120 | FinalOutput = Parse.GrepFindEmails()
121 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
122 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
123 | return FinalOutput, HtmlResults, JsonResults
124 |
--------------------------------------------------------------------------------
/Modules/CanaryBinSearch.py.old:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Non-API-Based
5 | import configparser
6 | import logging
7 | from BeautifulSoup import BeautifulSoup
8 | from Helpers import Download
9 | from Helpers import Parser
10 | from Helpers import helpers
11 |
12 | # Class will have the following properties:
13 | # 1) name / description
14 | # 2) main name called "ClassName"
15 | # 3) execute function (calls everything it needs)
16 | # 4) places the findings into a queue
17 |
18 | # This method will do the following:
19 | # 1) Get raw HTML for lets say enron.com )
20 | # This is mainly do to the API not supporting code searched with out known repo or user
21 | # :(https://canary.pw/search/?q=earthlink.net&page=3)
22 | # 2) Use beautiful soup to parse the results of the first (5) pages for tags that start with "/view/"
23 | # 3) Ueses a list of URL's and places that raw HTML into a on value
24 | # 4) Sends to parser for results
25 |
26 | # Some considerations are the retunred results: max 100 it seems
27 | # API may return a great array of results - This will be added later
28 | # Still having some major python errors
29 |
30 |
31 | class ClassName(object):
32 |
33 | def __init__(self, domain, verbose=False):
34 | self.apikey = False
35 | self.name = "Searching Canary Paste Bin"
36 | self.description = "Search Canary for paste potential data dumps, this can take a bit but a great source"
37 | self.domain = domain
38 | config = configparser.ConfigParser()
39 | self.Html = ""
40 | try:
41 | self.UserAgent = {
42 | 'User-Agent': helpers.getua()}
43 | self.logger = logging.getLogger("SimplyEmail.AskSearch")
44 | config.read('Common/SimplyEmail.ini')
45 | self.Depth = int(config['CanaryPasteBin']['PageDepth'])
46 | self.Counter = int(config['CanaryPasteBin']['QueryStart'])
47 | self.verbose = verbose
48 | except Exception as e:
49 | self.logger.critical(
50 | 'CanaryBinSearch module failed to load: ' + str(e))
51 | print helpers.color("[*] Major Settings for Canary PasteBin Search are missing, EXITING!\n", warning=True)
52 |
53 | def execute(self):
54 | self.logger.debug("CanaryBinSearch module started")
55 | self.process()
56 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
57 | return FinalOutput, HtmlResults, JsonResults
58 |
59 | def process(self):
60 | # Get all the Pastebin raw items
61 | # https://canary.pw/search/?q=earthlink.net&page=3
62 | UrlList = []
63 | dl = Download.Download(verbose=self.verbose)
64 | while self.Counter <= self.Depth:
65 | if self.verbose:
66 | p = ' [*] Canary Search on page: ' + str(self.Counter)
67 | self.logger.info(
68 | "CanaryBinSearch on page: " + str(self.Counter))
69 | print helpers.color(p, firewall=True)
70 | try:
71 | url = "https://canary.pw/search/?q=" + str(self.domain) + "&page=" + \
72 | str(self.Counter)
73 | rawhtml, statuscode = dl.requesturl(
74 | url, useragent=self.UserAgent, statuscode=True, verify=False)
75 | if statuscode != 200:
76 | break
77 | except Exception as e:
78 | error = " [!] Major issue with Canary Pastebin Search:" + \
79 | str(e)
80 | self.logger.error(
81 | 'Fail during Request to CanaryBinSearch (Check Connection): ' + str(e))
82 | print helpers.color(error, warning=True)
83 | # Parse the results for our URLS)
84 | soup = BeautifulSoup(rawhtml)
85 | for a in soup.findAll('a', href=True):
86 | a = a['href']
87 | if a.startswith('/view'):
88 | UrlList.append(a)
89 | self.Counter += 1
90 | # Now take all gathered URL's and gather the HTML content needed
91 | Status = " [*] Canary found " + \
92 | str(len(UrlList)) + " CanaryBin(s) to Search!"
93 | self.logger.info(
94 | "CanaryBin found " + str(len(UrlList)) + " CanaryBin(s) to Search!")
95 | print helpers.color(Status, status=True)
96 | for item in UrlList:
97 | try:
98 | item = "https://canary.pw" + str(item)
99 | # They can be massive!
100 | rawhtml = dl.requesturl(
101 | item, useragent=self.UserAgent, timeout=20)
102 | self.Html += rawhtml
103 | except Exception as e:
104 | error = " [!] Connection Timed out on Canary Pastebin Search:" + \
105 | str(e)
106 | self.logger.error(
107 | 'Fail during Request to CanaryBinSearch bin (Check Connection): ' + str(e))
108 | print helpers.color(error, warning=True)
109 |
110 | # We must Pre Parse (python dosnt like the large vars)
111 | def get_emails(self):
112 | # You must report back with parsing errors!!!
113 | # in one case I have seen alex@gmail.com:Password
114 | # This will break most Reg-Ex
115 | Parse = Parser.Parser(self.Html)
116 | Parse.genericClean()
117 | Parse.urlClean()
118 | FinalOutput = Parse.GrepFindEmails()
119 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
120 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
121 | self.logger.debug('CanaryBinSearch completed search')
122 | return FinalOutput, HtmlResults, JsonResults
123 |
--------------------------------------------------------------------------------
/Modules/ExaleadDOCSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 |
9 | import configparser
10 | import logging
11 | from Helpers import Download
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from Helpers import Converter
15 | from bs4 import BeautifulSoup
16 |
17 | # import for "'ascii' codec can't decode byte" error
18 | import sys
19 | reload(sys)
20 | sys.setdefaultencoding("utf-8")
21 | # import for "'ascii' codec can't decode byte" error
22 |
23 |
24 | class ClassName(object):
25 |
26 | def __init__(self, Domain, verbose=False):
27 | self.apikey = False
28 | self.name = "Exalead DOC Search for Emails"
29 | self.description = "Uses Exalead Dorking to search DOCs for emails"
30 | config = configparser.ConfigParser()
31 | try:
32 | self.logger = logging.getLogger("SimplyEmail.ExaleadDOCSearch")
33 | config.read('Common/SimplyEmail.ini')
34 | self.Domain = Domain
35 | self.Quanity = int(config['ExaleadDOCSearch']['StartQuantity'])
36 | self.UserAgent = {
37 | 'User-Agent': helpers.getua()}
38 | self.Limit = int(config['ExaleadDOCSearch']['QueryLimit'])
39 | self.Counter = int(config['ExaleadDOCSearch']['QueryStart'])
40 | self.verbose = verbose
41 | self.urlList = []
42 | self.Text = ""
43 | except Exception as e:
44 | self.logger.critical("ExaleadDOCSearch module failed to __init__: " + str(e))
45 | print helpers.color(" [*] Major Settings for Exalead are missing, EXITING!\n", warning=True)
46 |
47 | def execute(self):
48 | self.logger.debug("ExaleadDOCSearch module started")
49 | self.search()
50 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
51 | return FinalOutput, HtmlResults, JsonResults
52 |
53 | def search(self):
54 | dl = Download.Download(self.verbose)
55 | convert = Converter.Converter(verbose=self.verbose)
56 | while self.Counter <= self.Limit and self.Counter <= 10:
57 | helpers.modsleep(1)
58 | if self.verbose:
59 | p = ' [*] Exalead DOC Search on page: ' + str(self.Counter)
60 | self.logger.info('ExaleadDOCSearch on page: ' + str(self.Counter))
61 | print helpers.color(p, firewall=True)
62 | try:
63 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
64 | '"+filetype:word&elements_per_page=' + \
65 | str(self.Quanity) + '&start_index=' + str(self.Counter)
66 | except Exception as e:
67 | self.logger.error('ExaleadDOCSearch could not build URL')
68 | error = " [!] Major issue with Exalead DOC Search: " + str(e)
69 | print helpers.color(error, warning=True)
70 | try:
71 | RawHtml = dl.requesturl(url, useragent=self.UserAgent)
72 | # sometimes url is broken but exalead search results contain
73 | # e-mail
74 | self.Text += RawHtml
75 | soup = BeautifulSoup(RawHtml, "lxml")
76 | self.urlList = [h2.a["href"]
77 | for h2 in soup.findAll('h4', class_='media-heading')]
78 | except Exception as e:
79 | self.logger.error('ExaleadDOCSearch could not request / parse HTML')
80 | error = " [!] Fail during parsing result: " + str(e)
81 | print helpers.color(error, warning=True)
82 | self.Counter += 30
83 |
84 | # now download the required files
85 | try:
86 | for url in self.urlList:
87 | if self.verbose:
88 | p = ' [*] Exalead DOC search downloading: ' + str(url)
89 | self.logger.info('ExaleadDOCSearch downloading: ' + str(url))
90 | print helpers.color(p, firewall=True)
91 | try:
92 | filetype = ".doc"
93 | dl = Download.Download(self.verbose)
94 | FileName, FileDownload = dl.download_file(url, filetype)
95 | if FileDownload:
96 | if self.verbose:
97 | p = ' [*] Exalead DOC file was downloaded: ' + \
98 | str(url)
99 | self.logger.info('ExaleadDOCSearch downloaded: ' + str(p))
100 | print helpers.color(p, firewall=True)
101 | ft = helpers.filetype(FileName).lower()
102 | if 'word' in ft:
103 | self.Text += convert.convert_doc_to_txt(FileName)
104 | else:
105 | self.logger.warning('Downloaded file is not a DOC: ' + ft)
106 | except Exception as e:
107 | error = " [!] Issue with opening DOC Files:%s\n" % (str(e))
108 | print helpers.color(error, warning=True)
109 | try:
110 | dl.delete_file(FileName)
111 | except Exception as e:
112 | print e
113 | except Exception as e:
114 | self.logger.error("ExaleadDOCSearch no doc's to download")
115 | print helpers.color(" [*] No DOC's to download from Exalead!\n", firewall=True)
116 |
117 | if self.verbose:
118 | p = ' [*] Searching DOC from Exalead Complete'
119 | print helpers.color(p, status=True)
120 |
121 | def get_emails(self):
122 | Parse = Parser.Parser(self.Text)
123 | Parse.genericClean()
124 | Parse.urlClean()
125 | FinalOutput = Parse.GrepFindEmails()
126 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
127 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
128 | self.logger.debug('ExaleadDOCSearch completed search')
129 | return FinalOutput, HtmlResults, JsonResults
130 |
--------------------------------------------------------------------------------
/Modules/ExaleadDOCXSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import requests
10 | import time
11 | import logging
12 | from Helpers import Converter
13 | from Helpers import helpers
14 | from Helpers import Parser
15 | from Helpers import Download
16 | from bs4 import BeautifulSoup
17 |
18 | # import for "'ascii' codec can't decode byte" error
19 | import sys
20 | reload(sys)
21 | sys.setdefaultencoding("utf-8")
22 | # import for "'ascii' codec can't decode byte" error
23 |
24 |
25 | class ClassName(object):
26 |
27 | def __init__(self, Domain, verbose=False):
28 | self.apikey = False
29 | self.name = "Exalead DOCX Search for Emails"
30 | self.description = "Uses Exalead Dorking to search DOCXs for emails"
31 | config = configparser.ConfigParser()
32 | try:
33 | self.logger = logging.getLogger("SimplyEmail.ExaleadDOCXSearch")
34 | config.read('Common/SimplyEmail.ini')
35 | self.Domain = Domain
36 | self.Quanity = int(config['ExaleadDOCXSearch']['StartQuantity'])
37 | self.UserAgent = {
38 | 'User-Agent': helpers.getua()}
39 | self.Limit = int(config['ExaleadDOCXSearch']['QueryLimit'])
40 | self.Counter = int(config['ExaleadDOCXSearch']['QueryStart'])
41 | self.verbose = verbose
42 | self.urlList = []
43 | self.Text = ""
44 | except Exception as e:
45 | self.logger.critical("ExaleadDOCXSearch module failed to __init__: " + str(e))
46 | p = " [*] Major Settings for ExaleadDOCXSearch are missing, EXITING: " + e
47 | print helpers.color(p, warning=True)
48 |
49 | def execute(self):
50 | self.logger.debug("ExaleadDOCXSearch module started")
51 | self.search()
52 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
53 | return FinalOutput, HtmlResults, JsonResults
54 |
55 | def download_file(self, url):
56 | local_filename = url.split('/')[-1]
57 | # NOTE the stream=True parameter
58 | r = requests.get(url, stream=True)
59 | with open(local_filename, 'wb') as f:
60 | for chunk in r.iter_content(chunk_size=1024):
61 | if chunk: # filter out keep-alive new chunks
62 | f.write(chunk)
63 | # f.flush() commented by recommendation from J.F.Sebastian
64 | return local_filename
65 |
66 | def search(self):
67 | convert = Converter.Converter(verbose=self.verbose)
68 | while self.Counter <= self.Limit:
69 | time.sleep(1)
70 | if self.verbose:
71 | p = ' [*] Exalead Search on page: ' + str(self.Counter)
72 | self.logger.info("ExaleadDOCXSearch on page: " + str(self.Counter))
73 | print helpers.color(p, firewall=True)
74 | try:
75 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
76 | '"+filetype:docx&elements_per_page=' + \
77 | str(self.Quanity) + '&start_index=' + str(self.Counter)
78 | except Exception as e:
79 | self.logger.error("Issue building URL to search")
80 | error = " [!] Major issue with Exalead DOCX Search: " + str(e)
81 | print helpers.color(error, warning=True)
82 | try:
83 | r = requests.get(url, headers=self.UserAgent)
84 | except Exception as e:
85 | error = " [!] Fail during Request to Exalead (Check Connection):" + str(
86 | e)
87 | print helpers.color(error, warning=True)
88 | try:
89 | RawHtml = r.content
90 | # sometimes url is broken but exalead search results contain
91 | # e-mail
92 | self.Text += RawHtml
93 | soup = BeautifulSoup(RawHtml, "lxml")
94 | self.urlList = [h2.a["href"]
95 | for h2 in soup.findAll('h4', class_='media-heading')]
96 | except Exception as e:
97 | self.logger.error("Fail during parsing result: " + str(e))
98 | error = " [!] Fail during parsing result: " + str(e)
99 | print helpers.color(error, warning=True)
100 | self.Counter += 30
101 |
102 | # now download the required files
103 | try:
104 | for url in self.urlList:
105 | if self.verbose:
106 | p = ' [*] Exalead DOCX search downloading: ' + str(url)
107 | self.logger.info("Starting download of DOCX: " + str(url))
108 | print helpers.color(p, firewall=True)
109 | try:
110 | filetype = ".docx"
111 | dl = Download.Download(self.verbose)
112 | FileName, FileDownload = dl.download_file(url, filetype)
113 | if FileDownload:
114 | if self.verbose:
115 | self.logger.info("File was downloaded: " + str(url))
116 | p = ' [*] Exalead DOCX file was downloaded: ' + \
117 | str(url)
118 | print helpers.color(p, firewall=True)
119 | self.Text += convert.convert_docx_to_txt(FileName)
120 | except Exception as e:
121 | self.logger.error("Issue with opening DOCX Files: " + str(e))
122 | error = " [!] Issue with opening DOCX Files:%s\n" % (str(e))
123 | print helpers.color(error, warning=True)
124 | try:
125 | dl.delete_file(FileName)
126 | except Exception as e:
127 | print e
128 | except Exception as e:
129 | p = " [*] No DOCX's to download from Exalead: " + e
130 | self.logger.info("No DOCX's to download from Exalead: " + str(e))
131 | print helpers.color(p, firewall=True)
132 |
133 | if self.verbose:
134 |
135 | p = ' [*] Searching DOCX from Exalead Complete'
136 | self.logger.info("Searching DOCX from Exalead Complete")
137 | print helpers.color(p, status=True)
138 |
139 | def get_emails(self):
140 | Parse = Parser.Parser(self.Text)
141 | Parse.genericClean()
142 | Parse.urlClean()
143 | FinalOutput = Parse.GrepFindEmails()
144 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
145 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
146 | self.logger.debug('ExaleadDOCXSearch completed search')
147 | return FinalOutput, HtmlResults, JsonResults
148 |
--------------------------------------------------------------------------------
/Modules/ExaleadPDFSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import requests
10 | import time
11 | from Helpers import Converter
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from Helpers import Download
15 | from bs4 import BeautifulSoup
16 |
17 |
18 | class ClassName(object):
19 |
20 | def __init__(self, Domain, verbose=False):
21 | self.apikey = False
22 | self.name = "Exalead PDF Search for Emails"
23 | self.description = "Uses Exalead Dorking to search PDFs for emails"
24 | config = configparser.ConfigParser()
25 | try:
26 | config.read('Common/SimplyEmail.ini')
27 | self.Domain = Domain
28 | self.Quanity = int(config['ExaleadPDFSearch']['StartQuantity'])
29 | self.Limit = int(config['ExaleadPDFSearch']['QueryLimit'])
30 | self.UserAgent = {
31 | 'User-Agent': helpers.getua()}
32 | self.Counter = int(config['ExaleadPDFSearch']['QueryStart'])
33 | self.verbose = verbose
34 | self.urlList = []
35 | self.Text = ""
36 | except:
37 | print helpers.color(" [*] Major Settings for ExaleadPDFSearch are missing, EXITING!\n", warning=True)
38 |
39 | def execute(self):
40 | self.search()
41 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
42 | return FinalOutput, HtmlResults, JsonResults
43 |
44 | def search(self):
45 | convert = Converter.Converter(verbose=self.verbose)
46 | while self.Counter <= self.Limit and self.Counter <= 10:
47 | time.sleep(1)
48 | if self.verbose:
49 | p = ' [*] Exalead Search on page: ' + str(self.Counter)
50 | print helpers.color(p, firewall=True)
51 | try:
52 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
53 | '"+filetype:pdf&elements_per_page=' + \
54 | str(self.Quanity) + '&start_index=' + str(self.Counter)
55 | except Exception as e:
56 | error = " [!] Major issue with Exalead PDF Search: " + str(e)
57 | print helpers.color(error, warning=True)
58 | try:
59 | r = requests.get(url, headers=self.UserAgent)
60 | except Exception as e:
61 | error = " [!] Fail during Request to Exalead (Check Connection):" + str(
62 | e)
63 | print helpers.color(error, warning=True)
64 | try:
65 | RawHtml = r.content
66 | # sometimes url is broken but exalead search results contain
67 | # e-mail
68 | self.Text += RawHtml
69 | soup = BeautifulSoup(RawHtml, "lxml")
70 | self.urlList = [h2.a["href"]
71 | for h2 in soup.findAll('h4', class_='media-heading')]
72 | except Exception as e:
73 | error = " [!] Fail during parsing result: " + str(e)
74 | print helpers.color(error, warning=True)
75 | self.Counter += 30
76 |
77 | # now download the required files
78 | try:
79 | for url in self.urlList:
80 | if self.verbose:
81 | p = ' [*] Exalead PDF search downloading: ' + str(url)
82 | print helpers.color(p, firewall=True)
83 | try:
84 | filetype = ".pdf"
85 | dl = Download.Download(self.verbose)
86 | FileName, FileDownload = dl.download_file(url, filetype)
87 | if FileDownload:
88 | if self.verbose:
89 | p = ' [*] Exalead PDF file was downloaded: ' + \
90 | str(url)
91 | print helpers.color(p, firewall=True)
92 | self.Text += convert.convert_pdf_to_txt(FileName)
93 | except Exception as e:
94 | pass
95 | try:
96 | dl.delete_file(FileName)
97 | except Exception as e:
98 | print e
99 | except:
100 | print helpers.color(" [*] No PDF's to download from Exalead!\n", firewall=True)
101 |
102 | if self.verbose:
103 | p = ' [*] Searching PDF from Exalead Complete'
104 | print helpers.color(p, status=True)
105 |
106 | def get_emails(self):
107 | Parse = Parser.Parser(self.Text)
108 | Parse.genericClean()
109 | Parse.urlClean()
110 | FinalOutput = Parse.GrepFindEmails()
111 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
112 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
113 | return FinalOutput, HtmlResults, JsonResults
114 |
--------------------------------------------------------------------------------
/Modules/ExaleadPPTXSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 |
9 | import configparser
10 | import logging
11 | from Helpers import Download
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from Helpers import Converter
15 | from bs4 import BeautifulSoup
16 | # import for "'ascii' codec can't decode byte" error
17 | import sys
18 | reload(sys)
19 | sys.setdefaultencoding("utf-8")
20 | # import for "'ascii' codec can't decode byte" error
21 |
22 |
23 | class ClassName(object):
24 |
25 | def __init__(self, Domain, verbose=False):
26 | self.apikey = False
27 | self.name = "Exalead PPTX Search for Emails"
28 | self.description = "Uses Exalead Dorking to search PPTX for emails"
29 | config = configparser.ConfigParser()
30 | try:
31 | self.logger = logging.getLogger("SimplyEmail.ExaleadPPTXSearch")
32 | config.read('Common/SimplyEmail.ini')
33 | self.Domain = Domain
34 | self.Quanity = int(config['ExaleadPPTXSearch']['StartQuantity'])
35 | self.UserAgent = {
36 | 'User-Agent': helpers.getua()}
37 | self.Limit = int(config['ExaleadPPTXSearch']['QueryLimit'])
38 | self.Counter = int(config['ExaleadPPTXSearch']['QueryStart'])
39 | self.verbose = verbose
40 | self.urlList = []
41 | self.Text = ""
42 | except Exception as e:
43 | self.logger.critical("ExaleadPPTXSearch module failed to __init__: " + str(e))
44 | print helpers.color(" [*] Major Settings for Exalead are missing, EXITING!\n", warning=True)
45 |
46 | def execute(self):
47 | self.logger.debug("ExaleadPPTXSearch module started")
48 | self.search()
49 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
50 | return FinalOutput, HtmlResults, JsonResults
51 |
52 | def search(self):
53 | dl = Download.Download(self.verbose)
54 | convert = Converter.Converter(verbose=self.verbose)
55 | while self.Counter <= self.Limit and self.Counter <= 10:
56 | helpers.modsleep(1)
57 | if self.verbose:
58 | p = ' [*] Exalead PPTX Search on page: ' + str(self.Counter)
59 | self.logger.info('ExaleadPPTXSearch on page: ' + str(self.Counter))
60 | print helpers.color(p, firewall=True)
61 | try:
62 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
63 | '"+filetype:pptx&elements_per_page=' + \
64 | str(self.Quanity) + '&start_index=' + str(self.Counter)
65 | except Exception as e:
66 | self.logger.error('ExaleadPPTXSearch could not build URL')
67 | error = " [!] Major issue with Exalead PPTX Search: " + str(e)
68 | print helpers.color(error, warning=True)
69 | try:
70 | RawHtml = dl.requesturl(url, useragent=self.UserAgent)
71 | # sometimes url is broken but exalead search results contain
72 | # e-mail
73 | self.Text += RawHtml
74 | soup = BeautifulSoup(RawHtml, "lxml")
75 | self.urlList = [h2.a["href"]
76 | for h2 in soup.findAll('h4', class_='media-heading')]
77 | except Exception as e:
78 | self.logger.error('ExaleadPPTXSearch could not request / parse HTML')
79 | error = " [!] Fail during parsing result: " + str(e)
80 | print helpers.color(error, warning=True)
81 | self.Counter += 30
82 |
83 | # now download the required files
84 | try:
85 | for url in self.urlList:
86 | if self.verbose:
87 | p = ' [*] Exalead PPTX search downloading: ' + str(url)
88 | self.logger.info('ExaleadPPTXSearch downloading: ' + str(url))
89 | print helpers.color(p, firewall=True)
90 | try:
91 | filetype = ".pptx"
92 | dl = Download.Download(self.verbose)
93 | FileName, FileDownload = dl.download_file(url, filetype)
94 | if FileDownload:
95 | if self.verbose:
96 | p = ' [*] Exalead PPTX file was downloaded: ' + \
97 | str(url)
98 | self.logger.info('ExaleadDOCSearch downloaded: ' + str(p))
99 | print helpers.color(p, firewall=True)
100 | ft = helpers.filetype(FileName).lower()
101 | if 'powerpoint' in ft:
102 | self.Text += convert.convert_zip_to_text(FileName)
103 | else:
104 | self.logger.warning('Downloaded file is not a PPTX: ' + ft)
105 | except Exception as e:
106 | error = " [!] Issue with opening PPTX Files:%s" % (str(e))
107 | print helpers.color(error, warning=True)
108 | try:
109 | dl.delete_file(FileName)
110 | except Exception as e:
111 | print e
112 | except Exception as e:
113 | self.logger.error("ExaleadPPTXSearch no doc's to download")
114 | print helpers.color(" [*] No PPTX's to download from Exalead!\n", firewall=True)
115 |
116 | if self.verbose:
117 | p = ' [*] Searching PPTX from Exalead Complete'
118 | print helpers.color(p, status=True)
119 |
120 | def get_emails(self):
121 | Parse = Parser.Parser(self.Text)
122 | Parse.genericClean()
123 | Parse.urlClean()
124 | FinalOutput = Parse.GrepFindEmails()
125 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
126 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
127 | self.logger.debug('ExaleadPPTXSearch completed search')
128 | return FinalOutput, HtmlResults, JsonResults
129 |
--------------------------------------------------------------------------------
/Modules/ExaleadSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import requests
10 | import time
11 | from Helpers import helpers
12 | from Helpers import Parser
13 | from bs4 import BeautifulSoup
14 |
15 |
16 | class ClassName(object):
17 |
18 | def __init__(self, Domain, verbose=False):
19 | self.apikey = False
20 | self.name = "Exalead Search for Emails"
21 | self.description = "Uses Exalead to search for emails and parses them out of the Html"
22 | config = configparser.ConfigParser()
23 | try:
24 | config.read('Common/SimplyEmail.ini')
25 | self.Domain = Domain
26 | self.Quanity = int(config['ExaleadSearch']['StartQuantity'])
27 | self.UserAgent = {
28 | 'User-Agent': helpers.getua()}
29 | self.Limit = int(config['ExaleadSearch']['QueryLimit'])
30 | self.Counter = int(config['ExaleadSearch']['QueryStart'])
31 | self.verbose = verbose
32 | self.urlList = []
33 | self.Text = ""
34 | except:
35 | print helpers.color(" [*] Major Settings for Exalead are missing, EXITING!\n", warning=True)
36 |
37 | def execute(self):
38 | self.search()
39 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
40 | return FinalOutput, HtmlResults, JsonResults
41 |
42 | def search(self):
43 | while self.Counter <= self.Limit:
44 | time.sleep(1)
45 | if self.verbose:
46 | p = ' [*] Exalead Search on page: ' + str(self.Counter)
47 | print helpers.color(p, firewall=True)
48 | try:
49 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + '"&elements_per_page=' + \
50 | str(self.Quanity) + '&start_index=' + str(self.Counter)
51 | except Exception as e:
52 | error = " [!] Major issue with Exalead Search: " + str(e)
53 | print helpers.color(error, warning=True)
54 | try:
55 | r = requests.get(url, headers=self.UserAgent)
56 | except Exception as e:
57 | error = " [!] Fail during Request to Exalead (Check Connection):" + str(
58 | e)
59 | print helpers.color(error, warning=True)
60 | try:
61 | RawHtml = r.content
62 | # sometimes url is broken but exalead search results contain
63 | # e-mail
64 | self.Text += RawHtml
65 | soup = BeautifulSoup(RawHtml, "lxml")
66 | self.urlList = [h2.a["href"]
67 | for h2 in soup.findAll('h4', class_='media-heading')]
68 | except Exception as e:
69 | error = " [!] Fail during parsing result: " + str(e)
70 | print helpers.color(error, warning=True)
71 | self.Counter += 30
72 |
73 | # Now take all gathered URL's and gather the Raw content needed
74 | for Url in self.urlList:
75 | try:
76 | data = requests.get(Url, timeout=2)
77 | self.Text += data.content
78 | except Exception as e:
79 | error = " [!] Connection Timed out on Exalead Search:" + str(e)
80 | print helpers.color(error, warning=True)
81 |
82 | if self.verbose:
83 | p = ' [*] Searching Exalead Complete'
84 | print helpers.color(p, status=True)
85 |
86 | def get_emails(self):
87 | Parse = Parser.Parser(self.Text)
88 | Parse.genericClean()
89 | Parse.urlClean()
90 | FinalOutput = Parse.GrepFindEmails()
91 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
92 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
93 | return FinalOutput, HtmlResults, JsonResults
94 |
--------------------------------------------------------------------------------
/Modules/ExaleadXLSXSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import requests
9 | import configparser
10 | import time
11 | from Helpers import Download
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from Helpers import Converter
15 | from bs4 import BeautifulSoup
16 |
17 | # import for "'ascii' codec can't decode byte" error
18 | import sys
19 | reload(sys)
20 | sys.setdefaultencoding("utf-8")
21 | # import for "'ascii' codec can't decode byte" error
22 |
23 |
24 | class ClassName(object):
25 |
26 | def __init__(self, Domain, verbose=False):
27 | self.apikey = False
28 | self.name = "Exalead XLSX Search for Emails"
29 | self.description = "Uses Exalead Dorking to search XLSXs for emails"
30 | config = configparser.ConfigParser()
31 | try:
32 | config.read('Common/SimplyEmail.ini')
33 | self.Domain = Domain
34 | self.Quanity = int(config['ExaleadXLSXSearch']['StartQuantity'])
35 | self.UserAgent = {
36 | 'User-Agent': helpers.getua()}
37 | self.Limit = int(config['ExaleadXLSXSearch']['QueryLimit'])
38 | self.Counter = int(config['ExaleadXLSXSearch']['QueryStart'])
39 | self.verbose = verbose
40 | self.urlList = []
41 | self.Text = ""
42 | except:
43 | print helpers.color(" [*] Major Settings for ExaleadXLSXSearch are missing, EXITING!\n", warning=True)
44 |
45 | def execute(self):
46 | self.search()
47 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
48 | return FinalOutput, HtmlResults, JsonResults
49 |
50 | def search(self):
51 | dl = Download.Download(verbose=self.verbose)
52 | convert = Converter.Converter(verbose=self.verbose)
53 | while self.Counter <= self.Limit:
54 | time.sleep(1)
55 | if self.verbose:
56 | p = ' [*] Exalead XLSX Search on page: ' + str(self.Counter)
57 | print helpers.color(p, firewall=True)
58 | try:
59 | url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
60 | '"+filetype:xlsx&elements_per_page=' + \
61 | str(self.Quanity) + '&start_index=' + str(self.Counter)
62 | except Exception as e:
63 | error = " [!] Major issue with Exalead XLSX Search:" + str(e)
64 | print helpers.color(error, warning=True)
65 | try:
66 | r = requests.get(url, headers=self.UserAgent)
67 | except Exception as e:
68 | error = " [!] Fail during Request to Exalead (Check Connection):" + str(
69 | e)
70 | print helpers.color(error, warning=True)
71 | try:
72 | RawHtml = r.content
73 | # sometimes url is broken but exalead search results contain
74 | # e-mail
75 | self.Text += RawHtml
76 | soup = BeautifulSoup(RawHtml, "lxml")
77 | self.urlList = [h4.a["href"]
78 | for h4 in soup.findAll('h4', class_='media-heading')]
79 | except Exception as e:
80 | error = " [!] Fail during parsing result: " + str(e)
81 | print helpers.color(error, warning=True)
82 | self.Counter += 30
83 |
84 | # now download the required files
85 | try:
86 | for url in self.urlList:
87 | if self.verbose:
88 | p = ' [*] Exalead XLSX search downloading: ' + str(url)
89 | print helpers.color(p, firewall=True)
90 | try:
91 | filetype = ".xlsx"
92 | FileName, FileDownload = dl.download_file(url, filetype)
93 | if FileDownload:
94 | if self.verbose:
95 | p = ' [*] Exalead XLSX file was downloaded: ' + \
96 | str(url)
97 | print helpers.color(p, firewall=True)
98 | self.Text += convert.convert_Xlsx_to_Csv(FileName)
99 | except Exception as e:
100 | error = " [!] Issue with opening Xlsx Files:%s\n" % (str(e))
101 | print helpers.color(error, warning=True)
102 | try:
103 | dl.delete_file(FileName)
104 | except Exception as e:
105 | print e
106 | except:
107 | print helpers.color("[*] No XLSX's to download from Exalead!\n", firewall=True)
108 |
109 | if self.verbose:
110 | p = ' [*] Searching XLSX from Exalead Complete'
111 | print helpers.color(p, status=True)
112 |
113 | def get_emails(self):
114 | Parse = Parser.Parser(self.Text)
115 | Parse.genericClean()
116 | Parse.urlClean()
117 | FinalOutput = Parse.GrepFindEmails()
118 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
119 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
120 | return FinalOutput, HtmlResults, JsonResults
121 |
--------------------------------------------------------------------------------
/Modules/FlickrSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import configparser
3 | from Helpers import Download
4 | from Helpers import Parser
5 | from Helpers import helpers
6 |
7 | # Class will have the following properties:
8 | # 1) name / description
9 | # 2) main name called "ClassName"
10 | # 3) execute function (calls everything it needs)
11 | # 4) places the findings into a queue
12 |
13 |
14 | class ClassName(object):
15 |
16 | def __init__(self, domain, verbose=False):
17 | self.apikey = False
18 | self.name = "Searching Flickr"
19 | self.description = "Search the Flickr top relvant results for emails"
20 | self.domain = domain
21 | config = configparser.ConfigParser()
22 | self.results = ""
23 | self.verbose = verbose
24 | try:
25 | self.UserAgent = {
26 | 'User-Agent': helpers.getua()}
27 | config.read('Common/SimplyEmail.ini')
28 | self.HostName = str(config['FlickrSearch']['Hostname'])
29 | except:
30 | print helpers.color(" [*] Major Settings for FlickrSearch are missing, EXITING!\n", warning=True)
31 |
32 | def execute(self):
33 | self.process()
34 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
35 | return FinalOutput, HtmlResults, JsonResults
36 |
37 | def process(self):
38 | dl = Download.Download(verbose=self.verbose)
39 | try:
40 | url = "https://www.flickr.com/search/?text=%40" + self.domain
41 | rawhtml = dl.requesturl(url, useragent=self.UserAgent)
42 | except Exception as e:
43 | error = " [!] Major issue with Flickr Search:" + str(e)
44 | print helpers.color(error, warning=True)
45 | self.results += rawhtml
46 | if self.verbose:
47 | p = ' [*] FlickrSearch has completed'
48 | print helpers.color(p, firewall=True)
49 | # https://www.flickr.com/search/?text=%40microsoft.com
50 | # is an example of a complete request for "@microsoft.com"
51 |
52 | def get_emails(self):
53 | Parse = Parser.Parser(self.results)
54 | FinalOutput = Parse.GrepFindEmails()
55 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
56 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
57 | return FinalOutput, HtmlResults, JsonResults
58 |
--------------------------------------------------------------------------------
/Modules/GitHubCodeSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import configparser
4 | from BeautifulSoup import BeautifulSoup
5 | from Helpers import Download
6 | from Helpers import Parser
7 | from Helpers import helpers
8 |
9 | # Class will have the following properties:
10 | # 1) name / description
11 | # 2) main name called "ClassName"
12 | # 3) execute function (calls everything it needs)
13 | # 4) places the findings into a queue
14 |
15 | # This method will do the following:
16 | # 1) Get raw HTML for lets say enron.com (https://github.com/search?utf8=✓&q=enron.com+&type=Code&ref=searchresults)
17 | # This is mainly do to the API not supporting code searched with out known repo or user :(
18 | # 2) Use beautiful soup to parse the results of the first (5) pages for tags that start with "/"
19 | # 3) Ueses a list of URL's and places that raw HTML into a on value
20 | # 4) Sends to parser for results
21 |
22 | # Here was a simple version of parsing a page:
23 | # urlist = []
24 | # FinalHtml = ""
25 | # r = requests.get(
26 | # "https://github.com/search?utf8=%E2%9C%93&q=enron.com+&type=Code&ref=searchresults")
27 | # html = r.content
28 | # soup = BeautifulSoup(html)
29 | # for a in soup.findAll('a', href=True):
30 | # a = a['href']
31 | # if a.startswith('/'):
32 | # time.sleep(1)
33 | # a = 'https://github.com' + str(a)
34 | # html = requests.get(a)
35 | # print "[!] Hitting: " + a
36 | # FinalHtml += html.content
37 | # with open("temps.html", "w") as myfile:
38 | # output = myfile.write(FinalHtml)
39 |
40 |
41 | class ClassName(object):
42 |
43 | def __init__(self, domain, verbose=False):
44 | self.apikey = False
45 | self.name = "Searching GitHub Code"
46 | self.description = "Search GitHub code for emails using a large pool of code searches"
47 | self.domain = domain
48 | config = configparser.ConfigParser()
49 | self.Html = ""
50 | self.verbose = verbose
51 | try:
52 | self.UserAgent = {
53 | 'User-Agent': helpers.getua()}
54 | config.read('Common/SimplyEmail.ini')
55 | self.Depth = int(config['GitHubSearch']['PageDepth'])
56 | self.Counter = int(config['GitHubSearch']['QueryStart'])
57 | except:
58 | print helpers.color(" [*] Major Settings for GitHubSearch are missing, EXITING!\n", warning=True)
59 |
60 | def execute(self):
61 | self.process()
62 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
63 | return FinalOutput, HtmlResults, JsonResults
64 |
65 | def process(self):
66 | dl = Download.Download(verbose=self.verbose)
67 | # Get all the USER code Repos
68 | # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓
69 | UrlList = []
70 | while self.Counter <= self.Depth:
71 | if self.verbose:
72 | p = ' [*] GitHub Code Search on page: ' + str(self.Counter)
73 | print helpers.color(p, firewall=True)
74 | try:
75 | url = "https://github.com/search?p=" + str(self.Counter) + "&q=" + \
76 | str(self.domain) + "+&ref=searchresults&type=Code&utf8=✓"
77 | r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10)
78 | if r.status_code != 200:
79 | break
80 | except Exception as e:
81 | error = " [!] Major isself.Counter += 1sue with GitHub Search:" + \
82 | str(e)
83 | print helpers.color(error, warning=True)
84 | RawHtml = r.content
85 | # Parse the results for our URLS)
86 | soup = BeautifulSoup(RawHtml)
87 | for a in soup.findAll('a', href=True):
88 | a = a['href']
89 | if a.startswith('/'):
90 | UrlList.append(a)
91 | self.Counter += 1
92 | # Now take all gathered URL's and gather the HTML content needed
93 | for url in UrlList:
94 | try:
95 | url = "https://github.com" + url
96 | html = dl.requesturl(url, useragent=self.UserAgent, timeout=10)
97 | self.Html += html
98 | except Exception as e:
99 | error = " [!] Connection Timed out on Github Search:" + str(e)
100 | print helpers.color(error, warning=True)
101 |
102 | def get_emails(self):
103 | Parse = Parser.Parser(self.Html)
104 | Parse.genericClean()
105 | Parse.urlClean()
106 | FinalOutput = Parse.GrepFindEmails()
107 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
108 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
109 | return FinalOutput, HtmlResults, JsonResults
110 |
--------------------------------------------------------------------------------
/Modules/GitHubGistSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import configparser
4 | from BeautifulSoup import BeautifulSoup
5 | from Helpers import Download
6 | from Helpers import Parser
7 | from Helpers import helpers
8 |
9 | # Class will have the following properties:
10 | # 1) name / description
11 | # 2) main name called "ClassName"
12 | # 3) execute function (calls everything it needs)
13 | # 4) places the findings into a queue
14 |
15 |
16 | # https://gist.github.com/search?utf8=✓&q=%40enron.com&ref=searchresults
17 |
18 | class ClassName(object):
19 |
20 | def __init__(self, domain, verbose=False):
21 | self.apikey = False
22 | self.name = "Searching GitHubGist Code"
23 | self.description = "Search GitHubGist code for emails using a large pool of code searches"
24 | self.domain = domain
25 | config = configparser.ConfigParser()
26 | self.Html = ""
27 | self.verbose = verbose
28 | try:
29 | self.UserAgent = {
30 | 'User-Agent': helpers.getua()}
31 | config.read('Common/SimplyEmail.ini')
32 | self.Depth = int(config['GitHubGistSearch']['PageDepth'])
33 | self.Counter = int(config['GitHubGistSearch']['QueryStart'])
34 | except:
35 | print helpers.color(" [*] Major Settings for GitHubGistSearch are missing, EXITING!\n", warning=True)
36 |
37 | def execute(self):
38 | self.process()
39 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
40 | return FinalOutput, HtmlResults, JsonResults
41 |
42 | def process(self):
43 | dl = Download.Download(verbose=self.verbose)
44 | # Get all the USER code Repos
45 | # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓
46 | UrlList = []
47 | while self.Counter <= self.Depth:
48 | if self.verbose:
49 | p = ' [*] GitHub Gist Search Search on page: ' + \
50 | str(self.Counter)
51 | print helpers.color(p, firewall=True)
52 | try:
53 | # search?p=2&q=%40enron.com&ref=searchresults&utf8=✓
54 | url = "https://gist.github.com/search?p=" + str(self.Counter) + "&q=%40" + \
55 | str(self.domain) + "+&ref=searchresults&utf8=✓"
56 | r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10)
57 | if r.status_code != 200:
58 | break
59 | except Exception as e:
60 | error = " [!] Major issue with GitHubGist Search:" + \
61 | str(e)
62 | print helpers.color(error, warning=True)
63 | RawHtml = r.content
64 | # Parse the results for our URLS)
65 | soup = BeautifulSoup(RawHtml)
66 | for a in soup.findAll('a', href=True):
67 | a = a['href']
68 | if a.startswith('/'):
69 | UrlList.append(a)
70 | self.Counter += 1
71 | # Now take all gathered URL's and gather the HTML content needed
72 | for url in UrlList:
73 | try:
74 | url = "https://gist.github.com" + url
75 | html = dl.requesturl(url, useragent=self.UserAgent, timeout=10)
76 | self.Html += html
77 | except Exception as e:
78 | error = " [!] Connection Timed out on GithubGist Search:" + \
79 | str(e)
80 | print helpers.color(error, warning=True)
81 |
82 | def get_emails(self):
83 | Parse = Parser.Parser(self.Html)
84 | Parse.genericClean()
85 | Parse.urlClean()
86 | FinalOutput = Parse.GrepFindEmails()
87 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
88 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
89 | return FinalOutput, HtmlResults, JsonResults
90 |
--------------------------------------------------------------------------------
/Modules/GitHubUserSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import configparser
4 | from Helpers import Download
5 | from Helpers import Parser
6 | from Helpers import helpers
7 |
8 | # Class will have the following properties:
9 | # 1) name / description
10 | # 2) main name called "ClassName"
11 | # 3) execute function (calls everything it needs)
12 | # 4) places the findings into a queue
13 |
14 |
15 | class ClassName(object):
16 |
17 | def __init__(self, domain, verbose=False):
18 | self.apikey = False
19 | self.name = "Searching GitHubUser Search"
20 | self.description = "Search GitHubUser for emails the user search function"
21 | self.domain = domain
22 | config = configparser.ConfigParser()
23 | self.verbose = verbose
24 | self.Html = ""
25 | try:
26 | config.read('Common/SimplyEmail.ini')
27 | self.UserAgent = {
28 | 'User-Agent': helpers.getua()}
29 | self.Depth = int(config['GitHubUserSearch']['PageDepth'])
30 | self.Counter = int(config['GitHubUserSearch']['QueryStart'])
31 | except:
32 | print helpers.color(" [*] Major Settings for GitHubUserSearch are missing, EXITING!\n", warning=True)
33 |
34 | def execute(self):
35 | self.search()
36 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
37 | return FinalOutput, HtmlResults, JsonResults
38 |
39 | def search(self):
40 | dl = Download.Download(verbose=self.verbose)
41 | while self.Counter <= self.Depth and self.Counter <= 100:
42 | helpers.modsleep(5)
43 | if self.verbose:
44 | p = ' [*] GitHubUser Search on page: ' + str(self.Counter)
45 | print helpers.color(p, firewall=True)
46 | try:
47 | url = 'https://github.com/search?p=' + str(self.Counter) + '&q=' + \
48 | str(self.domain) + 'ref=searchresults&type=Users&utf8='
49 | except Exception as e:
50 | error = " [!] Major issue with GitHubUser Search:" + str(e)
51 | print helpers.color(error, warning=True)
52 | try:
53 | r = dl.requesturl(
54 | url, useragent=self.UserAgent, raw=True, timeout=10)
55 | except Exception as e:
56 | error = " [!] Fail during Request to GitHubUser (Check Connection):" + \
57 | str(e)
58 | print helpers.color(error, warning=True)
59 | results = r.content
60 | self.Html += results
61 | self.Counter += 1
62 |
63 | def get_emails(self):
64 | Parse = Parser.Parser(self.Html)
65 | Parse.genericClean()
66 | Parse.urlClean()
67 | FinalOutput = Parse.GrepFindEmails()
68 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
69 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
70 | return FinalOutput, HtmlResults, JsonResults
71 |
--------------------------------------------------------------------------------
/Modules/GoogleCsvSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import urlparse
9 | import configparser
10 | import time
11 | from Helpers import Download
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from BeautifulSoup import BeautifulSoup
15 |
16 |
17 | class ClassName(object):
18 |
19 | def __init__(self, Domain, verbose=False):
20 | self.apikey = False
21 | self.name = "Google CSV Search for Emails"
22 | self.description = "Uses Google Dorking to search for emails"
23 | config = configparser.ConfigParser()
24 | try:
25 | config.read('Common/SimplyEmail.ini')
26 | self.Domain = Domain
27 | self.Quanity = int(config['GoogleCsvSearch']['StartQuantity'])
28 | self.UserAgent = {
29 | 'User-Agent': helpers.getua()}
30 | self.Limit = int(config['GoogleCsvSearch']['QueryLimit'])
31 | self.Counter = int(config['GoogleCsvSearch']['QueryStart'])
32 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
33 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
34 | self.verbose = verbose
35 | self.urlList = []
36 | self.Text = ""
37 | except:
38 | print helpers.color(" [*] Major Settings for GoogleCsvSearch are missing, EXITING!\n", warning=True)
39 |
40 | def execute(self):
41 | self.search()
42 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
43 | return FinalOutput, HtmlResults, JsonResults
44 |
45 | def search(self):
46 | dl = Download.Download(self.verbose)
47 | while self.Counter <= self.Limit and self.Counter <= 100:
48 | time.sleep(1)
49 | if self.verbose:
50 | p = ' [*] Google CSV Search on page: ' + str(self.Counter)
51 | print helpers.color(p, firewall=True)
52 | try:
53 | url = "https://www.google.com/search?q=site:" + \
54 | self.Domain + "+filetype:csv&start=" + str(self.Counter)
55 | except Exception as e:
56 | error = " [!] Major issue with Google Search:" + str(e)
57 | print helpers.color(error, warning=True)
58 | try:
59 | RawHtml = dl.requesturl(url, useragent=self.UserAgent)
60 | except Exception as e:
61 | error = " [!] Fail during Request to Google (Check Connection):" + \
62 | str(e)
63 | print helpers.color(error, warning=True)
64 | # check for captcha
65 | try:
66 | # Url = r.url
67 | dl.GoogleCaptchaDetection(RawHtml)
68 | except Exception as e:
69 | print e
70 | soup = BeautifulSoup(RawHtml)
71 | # I use this to parse my results, for URLS to follow
72 | for a in soup.findAll('a'):
73 | try:
74 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
75 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
76 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
77 | l = urlparse.parse_qs(
78 | urlparse.urlparse(a['href']).query)['q'][0]
79 | if l.startswith('http') or l.startswith('www'):
80 | if "webcache.googleusercontent.com" not in l:
81 | self.urlList.append(l)
82 | except:
83 | pass
84 | self.Counter += 10
85 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
86 | # now download the required files
87 | try:
88 | for url in self.urlList:
89 | if self.verbose:
90 | p = ' [*] Google CSV search downloading: ' + str(url)
91 | print helpers.color(p, firewall=True)
92 | try:
93 | filetype = ".csv"
94 | FileName, FileDownload = dl.download_file2(url, filetype)
95 | if FileDownload:
96 | if self.verbose:
97 | p = '[*] Google CSV file was downloaded: ' + \
98 | str(url)
99 | print helpers.color(p, firewall=True)
100 | with open(FileName) as f:
101 | self.Text += f.read()
102 | # print self.Text
103 | except Exception as e:
104 | print helpers.color(" [!] Issue with opening CSV Files\n", firewall=True)
105 | try:
106 | dl.delete_file(FileName)
107 | except Exception as e:
108 | print e
109 | except:
110 | print helpers.color(" [*] No CSV to download from Google!\n", firewall=True)
111 |
112 | def get_emails(self):
113 | Parse = Parser.Parser(self.Text)
114 | Parse.genericClean()
115 | Parse.urlClean()
116 | FinalOutput = Parse.GrepFindEmails()
117 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
118 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
119 | return FinalOutput, HtmlResults, JsonResults
120 |
--------------------------------------------------------------------------------
/Modules/GoogleDocSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import requests
9 | import urlparse
10 | import configparser
11 | import time
12 | from Helpers import Download
13 | from Helpers import Converter
14 | from Helpers import helpers
15 | from Helpers import Parser
16 | from BeautifulSoup import BeautifulSoup
17 |
18 |
19 | class ClassName(object):
20 |
21 | def __init__(self, Domain, verbose=False):
22 | self.apikey = False
23 | self.name = "Google DOC Search for Emails"
24 | self.description = "Uses Google Dorking to search for emails"
25 | config = configparser.ConfigParser()
26 | try:
27 | config.read('Common/SimplyEmail.ini')
28 | self.Domain = Domain
29 | self.Quanity = int(config['GoogleDocSearch']['StartQuantity'])
30 | self.UserAgent = {
31 | 'User-Agent': helpers.getua()}
32 | self.Limit = int(config['GoogleDocSearch']['QueryLimit'])
33 | self.Counter = int(config['GoogleDocSearch']['QueryStart'])
34 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
35 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
36 | self.verbose = verbose
37 | self.urlList = []
38 | self.Text = ""
39 | except:
40 | print helpers.color("[*] Major Settings for GoogleDocSearch are missing, EXITING!\n", warning=True)
41 |
42 | def execute(self):
43 | self.search()
44 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
45 | return FinalOutput, HtmlResults, JsonResults
46 |
47 | def search(self):
48 | dl = Download.Download(self.verbose)
49 | convert = Converter.Converter(verbose=self.verbose)
50 | while self.Counter <= self.Limit and self.Counter <= 100:
51 | time.sleep(1)
52 | if self.verbose:
53 | p = ' [*] Google DOC Search on page: ' + str(self.Counter)
54 | print helpers.color(p, firewall=True)
55 | try:
56 | urly = "https://www.google.com/search?q=site:" + \
57 | self.Domain + "+filetype:doc&start=" + str(self.Counter)
58 | except Exception as e:
59 | error = " [!] Major issue with Google Search:" + str(e)
60 | print helpers.color(error, warning=True)
61 | try:
62 | r = requests.get(urly)
63 | except Exception as e:
64 | error = " [!] Fail during Request to Google (Check Connection):" + \
65 | str(e)
66 | print helpers.color(error, warning=True)
67 | RawHtml = r.content
68 | # check for captcha
69 | try:
70 | # Url = r.url
71 | dl.GoogleCaptchaDetection(RawHtml)
72 | except Exception as e:
73 | print e
74 | soup = BeautifulSoup(RawHtml)
75 | # I use this to parse my results, for URLS to follow
76 | for a in soup.findAll('a'):
77 | try:
78 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
79 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
80 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
81 | l = urlparse.parse_qs(
82 | urlparse.urlparse(a['href']).query)['q'][0]
83 | if l.startswith('http') or l.startswith('www'):
84 | if "webcache.googleusercontent.com" not in l:
85 | self.urlList.append(l)
86 | except:
87 | pass
88 | self.Counter += 10
89 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
90 | # now download the required files
91 | try:
92 | for url in self.urlList:
93 | if self.verbose:
94 | p = ' [*] Google DOC search downloading: ' + str(url)
95 | print helpers.color(p, firewall=True)
96 | try:
97 | filetype = ".doc"
98 | FileName, FileDownload = dl.download_file(url, filetype)
99 | if FileDownload:
100 | if self.verbose:
101 | p = ' [*] Google DOC file was downloaded: ' + \
102 | str(url)
103 | print helpers.color(p, firewall=True)
104 | self.Text += convert.convert_doc_to_txt(FileName)
105 | # print self.Text
106 | except Exception as e:
107 | print helpers.color(" [!] Issue with opening Doc Files\n", firewall=True)
108 | try:
109 | dl.delete_file(FileName)
110 | except Exception as e:
111 | print e
112 | except:
113 | print helpers.color(" [*] No DOC's to download from Google!\n", firewall=True)
114 |
115 | def get_emails(self):
116 | Parse = Parser.Parser(self.Text)
117 | Parse.genericClean()
118 | Parse.urlClean()
119 | FinalOutput = Parse.GrepFindEmails()
120 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
121 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
122 | return FinalOutput, HtmlResults, JsonResults
123 |
--------------------------------------------------------------------------------
/Modules/GoogleDocxSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding=utf8
3 |
4 | # Class will have the following properties:
5 | # 1) name / description
6 | # 2) main name called "ClassName"
7 | # 3) execute function (calls everything it needs)
8 | # 4) places the findings into a queue
9 | import requests
10 | import urlparse
11 | import configparser
12 | import time
13 | from Helpers import Converter
14 | from Helpers import helpers
15 | from Helpers import Parser
16 | from Helpers import Download
17 | from BeautifulSoup import BeautifulSoup
18 |
19 |
20 | class ClassName(object):
21 |
22 | def __init__(self, Domain, verbose=False):
23 | self.apikey = False
24 | self.name = "Google DOCX Search for Emails"
25 | self.description = "Uses Google Dorking to search for emails"
26 | config = configparser.ConfigParser()
27 | try:
28 | config.read('Common/SimplyEmail.ini')
29 | self.Domain = Domain
30 | self.Quanity = int(config['GoogleDocxSearch']['StartQuantity'])
31 | self.UserAgent = {
32 | 'User-Agent': helpers.getua()}
33 | self.Limit = int(config['GoogleDocxSearch']['QueryLimit'])
34 | self.Counter = int(config['GoogleDocxSearch']['QueryStart'])
35 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
36 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
37 | self.verbose = verbose
38 | self.urlList = []
39 | self.Text = ""
40 | except:
41 | print helpers.color(" [*] Major Settings for GoogleDocxSearch are missing, EXITING!\n", warning=True)
42 |
43 | def execute(self):
44 | self.search()
45 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
46 | return FinalOutput, HtmlResults, JsonResults
47 |
48 | def search(self):
49 | dl = Download.Download(self.verbose)
50 | convert = Converter.Converter(verbose=self.verbose)
51 | while self.Counter <= self.Limit and self.Counter <= 100:
52 | time.sleep(1)
53 | if self.verbose:
54 | p = ' [*] Google DOCX Search on page: ' + str(self.Counter)
55 | print helpers.color(p, firewall=True)
56 | try:
57 | urly = "https://www.google.com/search?q=site:" + \
58 | self.Domain + "+filetype:docx&start=" + str(self.Counter)
59 | except Exception as e:
60 | error = "[!] Major issue with Google Search:" + str(e)
61 | print helpers.color(error, warning=True)
62 | try:
63 | r = requests.get(urly)
64 | except Exception as e:
65 | error = " [!] Fail during Request to Google (Check Connection):" + \
66 | str(e)
67 | print helpers.color(error, warning=True)
68 | RawHtml = r.content
69 | soup = BeautifulSoup(RawHtml)
70 | # I use this to parse my results, for URLS to follow
71 | for a in soup.findAll('a'):
72 | try:
73 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
74 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
75 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
76 | l = urlparse.parse_qs(
77 | urlparse.urlparse(a['href']).query)['q'][0]
78 | if l.startswith('http') or l.startswith('www'):
79 | if "webcache.googleusercontent.com" not in l:
80 | self.urlList.append(l)
81 | except:
82 | pass
83 | self.Counter += 10
84 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
85 | # now download the required files
86 | try:
87 | for url in self.urlList:
88 | if self.verbose:
89 | p = ' [*] Google DOCX search downloading: ' + str(url)
90 | print helpers.color(p, firewall=True)
91 | try:
92 | filetype = ".docx"
93 | FileName, FileDownload = dl.download_file(url, filetype)
94 | if FileDownload:
95 | if self.verbose:
96 | p = ' [*] Google DOCX file was downloaded: ' + \
97 | str(url)
98 | print helpers.color(p, firewall=True)
99 | self.Text += convert.convert_docx_to_txt(FileName)
100 | # print self.Text
101 | except Exception as e:
102 | print helpers.color(" [!] Issue with Converting Docx Files\n", firewall=True)
103 | try:
104 | dl.delete_file(FileName)
105 | except Exception as e:
106 | print e
107 | except:
108 | print helpers.color(" [*] No DOCX's to download from Google!\n", firewall=True)
109 |
110 | def get_emails(self):
111 | Parse = Parser.Parser(self.Text)
112 | Parse.RemoveUnicode()
113 | Parse.genericClean()
114 | Parse.urlClean()
115 | # Unicode issues here:
116 | FinalOutput = Parse.GrepFindEmails()
117 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
118 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
119 | return FinalOutput, HtmlResults, JsonResults
120 |
--------------------------------------------------------------------------------
/Modules/GooglePDFSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import requests
9 | import urlparse
10 | import configparser
11 | import time
12 | from Helpers import helpers
13 | from Helpers import Parser
14 | from Helpers import Download
15 | from Helpers import Converter
16 | from BeautifulSoup import BeautifulSoup
17 |
18 |
19 | class ClassName(object):
20 |
21 | def __init__(self, Domain, verbose=False):
22 | self.apikey = False
23 | self.name = "Google PDF Search for Emails"
24 | self.description = "Uses Google Dorking to search for emails"
25 | config = configparser.ConfigParser()
26 | try:
27 | config.read('Common/SimplyEmail.ini')
28 | self.Domain = Domain
29 | self.Quanity = int(config['GooglePDFSearch']['StartQuantity'])
30 | self.UserAgent = {
31 | 'User-Agent': helpers.getua()}
32 | self.Limit = int(config['GooglePDFSearch']['QueryLimit'])
33 | self.Counter = int(config['GooglePDFSearch']['QueryStart'])
34 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
35 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
36 | self.verbose = verbose
37 | self.urlList = []
38 | self.Text = ""
39 | except:
40 | print helpers.color(" [*] Major Settings for GooglePDFSearch are missing, EXITING!\n", warning=True)
41 |
42 | def execute(self):
43 | self.search()
44 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
45 | return FinalOutput, HtmlResults, JsonResults
46 |
47 | def search(self):
48 | # setup for helpers in the download class
49 | convert = Converter.Converter(verbose=self.verbose)
50 | dl = Download.Download(self.verbose)
51 | while self.Counter <= self.Limit and self.Counter <= 100:
52 | time.sleep(1)
53 | if self.verbose:
54 | p = ' [*] Google PDF Search on page: ' + str(self.Counter)
55 | print helpers.color(p, firewall=True)
56 | try:
57 | urly = "https://www.google.com/search?q=" + \
58 | self.Domain + "+filetype:pdf&start=" + str(self.Counter)
59 | except Exception as e:
60 | error = " [!] Major issue with Google Search:" + str(e)
61 | print helpers.color(error, warning=True)
62 | try:
63 | r = requests.get(urly)
64 | except Exception as e:
65 | error = " [!] Fail during Request to Google (Check Connection):" + \
66 | str(e)
67 | print helpers.color(error, warning=True)
68 | RawHtml = r.content
69 | # get redirect URL
70 | # Url = r.url
71 | dl.GoogleCaptchaDetection(RawHtml)
72 | soup = BeautifulSoup(RawHtml)
73 | for a in soup.findAll('a'):
74 | try:
75 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
76 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
77 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
78 | l = urlparse.parse_qs(
79 | urlparse.urlparse(a['href']).query)['q'][0]
80 | if l.startswith('http') or l.startswith('www'):
81 | if "webcache.googleusercontent.com" not in l:
82 | self.urlList.append(l)
83 | except:
84 | pass
85 | self.Counter += 10
86 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
87 | # now download the required files
88 | try:
89 | for url in self.urlList:
90 | if self.verbose:
91 | p = ' [*] Google PDF search downloading: ' + str(url)
92 | print helpers.color(p, firewall=True)
93 | try:
94 | filetype = ".pdf"
95 | # use new helper class to download file
96 | FileName, FileDownload = dl.download_file(url, filetype)
97 | # check if the file was downloaded
98 | if FileDownload:
99 | if self.verbose:
100 | p = ' [*] Google PDF file was downloaded: ' + \
101 | str(url)
102 | print helpers.color(p, firewall=True)
103 | self.Text += convert.convert_pdf_to_txt(FileName)
104 | except Exception as e:
105 | print e
106 | try:
107 | # now remove any files left behind
108 | dl.delete_file(FileName)
109 | except Exception as e:
110 | print e
111 | except:
112 | print helpers.color(" [*] No PDF's to download from Google!\n", firewall=True)
113 |
114 | def get_emails(self):
115 | Parse = Parser.Parser(self.Text)
116 | Parse.genericClean()
117 | Parse.urlClean()
118 | FinalOutput = Parse.GrepFindEmails()
119 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
120 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
121 | return FinalOutput, HtmlResults, JsonResults
122 |
--------------------------------------------------------------------------------
/Modules/GooglePPTXSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everthing it neeeds)
7 | # 4) places the findings into a queue
8 | import urlparse
9 | import configparser
10 | import time
11 | from Helpers import Converter
12 | from Helpers import Download
13 | from Helpers import helpers
14 | from Helpers import Parser
15 | from BeautifulSoup import BeautifulSoup
16 |
17 |
18 | class ClassName(object):
19 |
20 | def __init__(self, Domain, verbose=False):
21 | self.apikey = False
22 | self.name = "Google PPTX Search for Emails"
23 | self.description = "Uses Google Dorking to search for emails"
24 | config = configparser.ConfigParser()
25 | try:
26 | config.read('Common/SimplyEmail.ini')
27 | self.Domain = Domain
28 | self.Quanity = int(config['GooglePptxSearch']['StartQuantity'])
29 | self.UserAgent = {
30 | 'User-Agent': helpers.getua()}
31 | self.Limit = int(config['GooglePptxSearch']['QueryLimit'])
32 | self.Counter = int(config['GooglePptxSearch']['QueryStart'])
33 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
34 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
35 | self.verbose = verbose
36 | self.urlList = []
37 | self.Text = ""
38 | except:
39 | print helpers.color(" [*] Major Settings for GooglePptxSearch are missing, EXITING!\n", warning=True)
40 |
41 | def execute(self):
42 | self.search()
43 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
44 | return FinalOutput, HtmlResults, JsonResults
45 |
46 | def search(self):
47 | convert = Converter.Converter(self.verbose)
48 | dl = Download.Download(self.verbose)
49 | while self.Counter <= self.Limit and self.Counter <= 100:
50 | time.sleep(1)
51 | if self.verbose:
52 | p = ' [*] Google PPTX Search on page: ' + str(self.Counter)
53 | print helpers.color(p, firewall=True)
54 | try:
55 | url = "https://www.google.com/search?q=" + \
56 | self.Domain + "+filetype:pptx&start=" + str(self.Counter)
57 | except Exception as e:
58 | error = " [!] Major issue with Google Search:" + str(e)
59 | print helpers.color(error, warning=True)
60 | try:
61 | RawHtml = dl.requesturl(url, useragent=self.UserAgent)
62 | except Exception as e:
63 | error = " [!] Fail during Request to Google (Check Connection):" + \
64 | str(e)
65 | print helpers.color(error, warning=True)
66 | # check for captcha
67 | try:
68 | # Url = r.url
69 | dl.GoogleCaptchaDetection(RawHtml)
70 | except Exception as e:
71 | print e
72 | soup = BeautifulSoup(RawHtml)
73 | # I use this to parse my results, for URLS to follow
74 | for a in soup.findAll('a'):
75 | try:
76 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
77 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
78 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
79 | l = urlparse.parse_qs(urlparse.urlparse(a['href']).query)['q'][0]
80 | if l.startswith('http') or l.startswith('www') or l.startswith('https'):
81 | if "webcache.googleusercontent.com" not in l:
82 | self.urlList.append(l)
83 | # for some reason PPTX seems to be cached data:
84 | l = urlparse.parse_qs(urlparse.urlparse(a['href']).query)['q'][0]
85 | l = l.split(':', 2)
86 | if "webcache.googleusercontent.com" not in l[2]:
87 | self.urlList.append(l[2])
88 | except:
89 | pass
90 | self.Counter += 10
91 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
92 | # now download the required files
93 | try:
94 | for url in self.urlList:
95 | if self.verbose:
96 | p = ' [*] Google PPTX search downloading: ' + str(url)
97 | print helpers.color(p, firewall=True)
98 | try:
99 | filetype = ".pptx"
100 | FileName, FileDownload = dl.download_file2(url, filetype)
101 | if FileDownload:
102 | if self.verbose:
103 | p = ' [*] Google PPTX file was downloaded: ' + \
104 | str(url)
105 | print helpers.color(p, firewall=True)
106 | ft = helpers.filetype(FileName).lower()
107 | if 'powerpoint' in ft:
108 | # self.Text += convert.convert_zip_to_text(FileName)
109 | self.Text += convert.convert_zip_to_text(FileName)
110 | else:
111 | self.logger.warning('Downloaded file is not a PPTX: ' + ft)
112 | # print self.Text
113 | except Exception as e:
114 | print helpers.color(" [!] Issue with opening PPTX Files\n", firewall=True)
115 | try:
116 | if FileDownload:
117 | dl.delete_file(FileName)
118 | except Exception as e:
119 | self.logger.warning('Issue deleting file: ' + str(e))
120 | except:
121 | print helpers.color(" [*] No CSV to download from Google!\n", firewall=True)
122 |
123 | def get_emails(self):
124 | Parse = Parser.Parser(self.Text)
125 | Parse.RemoveUnicode()
126 | Parse.genericClean()
127 | Parse.urlClean()
128 | FinalOutput = Parse.GrepFindEmails()
129 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
130 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
131 | return FinalOutput, HtmlResults, JsonResults
132 |
--------------------------------------------------------------------------------
/Modules/GoogleSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import time
10 | from Helpers import helpers
11 | from Helpers import Parser
12 | from Helpers import Download
13 |
14 |
15 | class ClassName(object):
16 |
17 | def __init__(self, Domain, verbose=False):
18 | self.apikey = False
19 | self.name = "Google Search for Emails"
20 | self.description = "Uses Google to search for emails, parses them out of the"
21 | config = configparser.ConfigParser()
22 | try:
23 | config.read('Common/SimplyEmail.ini')
24 | self.Domain = Domain
25 | self.Quanity = int(config['GoogleSearch']['StartQuantity'])
26 | self.UserAgent = {
27 | 'User-Agent': helpers.getua()}
28 | self.Limit = int(config['GoogleSearch']['QueryLimit'])
29 | self.Counter = int(config['GoogleSearch']['QueryStart'])
30 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
31 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
32 | self.verbose = verbose
33 | self.Html = ""
34 | except:
35 | print helpers.color(" [*] Major Settings for GoogleSearch are missing, EXITING!\n", warning=True)
36 |
37 | def execute(self):
38 | self.search()
39 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
40 | return FinalOutput, HtmlResults, JsonResults
41 |
42 | def search(self):
43 | dl = Download.Download(self.verbose)
44 | while self.Counter <= self.Limit and self.Counter <= 1000:
45 | time.sleep(1)
46 | if self.verbose:
47 | p = ' [*] Google Search on page: ' + str(self.Counter)
48 | print helpers.color(p, firewall=True)
49 | try:
50 | url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + \
51 | str(self.Counter) + "&hl=en&meta=&q=%40\"" + \
52 | self.Domain + "\""
53 | except Exception as e:
54 | error = " [!] Major issue with Google Search:" + str(e)
55 | print helpers.color(error, warning=True)
56 | try:
57 | results = dl.requesturl(url, useragent=self.UserAgent)
58 | except Exception as e:
59 | error = " [!] Fail during Request to Google (Check Connection):" + \
60 | str(e)
61 | print helpers.color(error, warning=True)
62 | try:
63 | # Url = r.url
64 | dl.GoogleCaptchaDetection(results)
65 | except Exception as e:
66 | print e
67 | self.Html += results
68 | self.Counter += 100
69 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
70 | def get_emails(self):
71 | Parse = Parser.Parser(self.Html)
72 | Parse.genericClean()
73 | Parse.urlClean()
74 | FinalOutput = Parse.GrepFindEmails()
75 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
76 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
77 | return FinalOutput, HtmlResults, JsonResults
78 |
--------------------------------------------------------------------------------
/Modules/GoogleXLSXSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import requests
9 | import urlparse
10 | import configparser
11 | import time
12 | import logging
13 | from Helpers import Download
14 | from Helpers import helpers
15 | from Helpers import Parser
16 | from Helpers import Converter
17 | from BeautifulSoup import BeautifulSoup
18 |
19 |
20 | class ClassName(object):
21 |
22 | def __init__(self, Domain, verbose=False):
23 | self.apikey = False
24 | self.name = "Google XLSX Search for Emails"
25 | self.description = "Uses Google Dorking to search for emails"
26 | config = configparser.ConfigParser()
27 | try:
28 | self.logger = logging.getLogger("SimplyEmail.GoogleXlsxSearch")
29 | config.read('Common/SimplyEmail.ini')
30 | self.Domain = Domain
31 | self.Quanity = int(config['GoogleXlsxSearch']['StartQuantity'])
32 | self.Limit = int(config['GoogleXlsxSearch']['QueryLimit'])
33 | self.UserAgent = {
34 | 'User-Agent': helpers.getua()}
35 | self.Counter = int(config['GoogleXlsxSearch']['QueryStart'])
36 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
37 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
38 | self.verbose = verbose
39 | self.urlList = []
40 | self.Text = ""
41 | except Exception as e:
42 | self.logger.critical(
43 | 'GoogleXlsxSearch module failed to load: ' + str(e))
44 | print helpers.color(" [*] Major Settings for GoogleXlsxSearch are missing, EXITING!\n", warning=True)
45 |
46 | def execute(self):
47 | self.logger.debug("GoogleXlsxSearch Started")
48 | self.search()
49 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
50 | return FinalOutput, HtmlResults, JsonResults
51 |
52 | def search(self):
53 | convert = Converter.Converter(verbose=self.verbose)
54 | while self.Counter <= self.Limit and self.Counter <= 100:
55 | time.sleep(1)
56 | if self.verbose:
57 | p = ' [*] Google XLSX Search on page: ' + str(self.Counter)
58 | self.logger.info(
59 | "Google XLSX Search on page: " + str(self.Counter))
60 | print helpers.color(p, firewall=True)
61 | try:
62 | urly = "https://www.google.com/search?q=site:" + \
63 | self.Domain + "+filetype:xlsx&start=" + str(self.Counter)
64 | except Exception as e:
65 | error = " [!] Major issue with Google XLSX Search:" + str(e)
66 | self.logger.error(
67 | "GoogleXlsxSearch failed to build url: " + str(e))
68 | print helpers.color(error, warning=True)
69 | try:
70 | r = requests.get(urly)
71 | except Exception as e:
72 | error = " [!] Fail during Request to Google (Check Connection):" + \
73 | str(e)
74 | self.logger.error(
75 | "GoogleXlsxSearch failed to request url (Check Connection): " + str(e))
76 | print helpers.color(error, warning=True)
77 | RawHtml = r.content
78 | soup = BeautifulSoup(RawHtml)
79 | # I use this to parse my results, for URLS to follow
80 | for a in soup.findAll('a'):
81 | try:
82 | # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
83 | # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
84 | # newreg=01f0ed80771f4dfaa269b15268b3f9a9
85 | l = urlparse.parse_qs(
86 | urlparse.urlparse(a['href']).query)['q'][0]
87 | if l.startswith('http') or l.startswith('www'):
88 | if "webcache.googleusercontent.com" not in l:
89 | self.urlList.append(l)
90 | except:
91 | pass
92 | self.Counter += 10
93 | helpers.modsleep(self.Sleep, jitter=self.Jitter)
94 | # now download the required files
95 | self.logger.debug(
96 | "GoogleXlsxSearch completed HTML result query, starting downloads")
97 | try:
98 | for url in self.urlList:
99 | if self.verbose:
100 | p = ' [*] Google XLSX search downloading: ' + str(url)
101 | self.logger.info(
102 | "Google XLSX search downloading: " + str(url))
103 | print helpers.color(p, firewall=True)
104 | try:
105 | filetype = ".xlsx"
106 | dl = Download.Download(self.verbose)
107 | FileName, FileDownload = dl.download_file(url, filetype)
108 | if FileDownload:
109 | if self.verbose:
110 | p = ' [*] Google XLSX file was downloaded: ' + \
111 | str(url)
112 | self.logger.info(
113 | "Google XLSX file was downloaded: " + str(url))
114 | print helpers.color(p, firewall=True)
115 | self.Text += convert.convert_Xlsx_to_Csv(FileName)
116 | # print self.Text
117 | except Exception as e:
118 | print helpers.color(" [!] Issue with opening Xlsx Files\n", firewall=True)
119 | self.logger.error("Google XLSX had issue opening file")
120 | try:
121 | dl.delete_file(FileName)
122 | except Exception as e:
123 | self.logger.error(
124 | "Google XLSX failed to delete file: " + str(e))
125 | except Exception as e:
126 | print helpers.color(" [*] No XLSX's to download from google!\n", firewall=True)
127 | self.logger.error("No XLSX's to download from google! " + str(e))
128 |
129 | def get_emails(self):
130 | Parse = Parser.Parser(self.Text)
131 | Parse.genericClean()
132 | Parse.urlClean()
133 | FinalOutput = Parse.GrepFindEmails()
134 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
135 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
136 | self.logger.debug('GoogleXlsxSearch completed search')
137 | return FinalOutput, HtmlResults, JsonResults
138 |
--------------------------------------------------------------------------------
/Modules/HtmlScrape.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import subprocess
4 | import configparser
5 | import os
6 | import shutil
7 | from Helpers import helpers
8 | from Helpers import Parser
9 |
10 |
11 | # Class will have the following properties:
12 | # 1) name / description
13 | # 2) main name called "ClassName"
14 | # 3) execute function (calls everything it needs)
15 | # 4) places the findings into a queue
16 |
17 | # Use the same class name so we can easily start up each module the same ways
18 | class ClassName(object):
19 |
20 | def __init__(self, domain, verbose=False):
21 | self.apikey = False
22 | # Descriptions are required!!!
23 | self.name = "HTML Scrape of Target Website"
24 | self.description = "Html Scrape the target website for emails and data"
25 | # Settings we will pull from config file (We need required options in
26 | # config file)
27 | config = configparser.ConfigParser()
28 | try:
29 | config.read('Common/SimplyEmail.ini')
30 | self.verbose = verbose
31 | self.domain = domain
32 | self.useragent = "--user-agent=\"" + str(config['GlobalSettings']['UserAgent']) + "\""
33 | self.depth = "--level=" + str(config['HtmlScrape']['Depth'])
34 | self.wait = "--wait=" + str(config['HtmlScrape']['Wait'])
35 | self.limit_rate = "--limit-rate=" + \
36 | str(config['HtmlScrape']['LimitRate'])
37 | self.timeout = "--timeout=" + \
38 | str(config['HtmlScrape']['Timeout'])
39 | self.save = "--directory-prefix=" + \
40 | str(config['HtmlScrape']['Save']) + str(self.domain)
41 | self.remove = str(config['HtmlScrape']['RemoveHTML'])
42 | self.retVal = 0
43 | self.maxRetries = "--tries=5"
44 | except:
45 | print helpers.color(" [*] Major Settings for HTML are missing, EXITING!\n", warning=True)
46 |
47 | def execute(self):
48 | try:
49 | self.search()
50 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
51 | return FinalOutput, HtmlResults, JsonResults
52 | except Exception as e:
53 | print e
54 |
55 | def search(self):
56 | # setup domain so it will follow redirects
57 | # may move this to httrack in future
58 | TempDomain = "http://www." + str(self.domain)
59 | try:
60 | # Using subprocess, more or less because of the robust HTML mirroring ability
61 | # And also allows proxy / VPN Support
62 | # "--convert-links"
63 | if self.verbose:
64 | p = ' [*] HTML scrape underway [This can take a bit!]'
65 | print helpers.color(p, firewall=True)
66 | self.retVal = subprocess.call(["wget", "-q", "-e robots=off", "--header=\"Accept: text/html\"", self.useragent,
67 | "--recursive", self.depth, self.wait, self.limit_rate, self.save,
68 | self.timeout, "--page-requisites", "-R gif,jpg,pdf,png,css,zip,mov,wmv,ppt,doc,docx,xls,exe,bin,pptx,avi,swf,vbs,xlsx,kfp,pub",
69 | "--no-clobber", self.maxRetries,"--domains", self.domain, TempDomain])
70 | if self.retVal > 0:
71 | print helpers.color(" [*] Wget returned error, likely 403 (attempting again): " + str(self.retVal), warning=True)
72 | self.retVal = subprocess.call(["wget", "-q", "-e robots=off", "--header=\"Accept: text/html\"", self.useragent,
73 | "--recursive", self.depth, self.wait, self.limit_rate, self.save,
74 | self.timeout, "--page-requisites", "-R gif,jpg,pdf,png,css,zip,mov,wmv,ppt,doc,docx,xls,exe,bin,pptx,avi,swf,vbs,xlsx,kfp,pub",
75 | "--no-clobber", self.maxRetries,"--domains", self.domain, TempDomain])
76 | except Exception as e:
77 | print e
78 | print " [!] ERROR during Wget Request"
79 |
80 | def get_emails(self):
81 | # Direct location of new dir created during wget
82 | output = []
83 | FinalOutput = []
84 | val = ""
85 | directory = self.save.replace("--directory-prefix=", "")
86 | # directory = "www." + directory
87 | # Grep for any data containing "@", sorting out binary files as well
88 | # Pass list of Dirs to a regex, and read that path for emails
89 | try:
90 | if self.retVal > 0:
91 | pass
92 | else:
93 | ps = subprocess.Popen(
94 | ('grep', '-r', "@", directory), stdout=subprocess.PIPE)
95 | # Take in "ps" var and parse it for only email addresses
96 | output = []
97 | try:
98 | val = subprocess.check_output(("grep", "-i", "-o", '[A-Z0-9._%+-]\+@[A-Z0-9.-]\+\.[A-Z]\{2,4\}'),
99 | stdin=ps.stdout)
100 | except Exception as e:
101 | pass
102 | # Super "hack" since the data returned is from pipeline /n and all
103 | # in val
104 | if val:
105 | with open('temp.txt', "w+") as myfile:
106 | myfile.write(str(val))
107 | with open('temp.txt', "r") as myfile:
108 | output = myfile.readlines()
109 | os.remove('temp.txt')
110 | for item in output:
111 | FinalOutput.append(item.rstrip("\n"))
112 | except Exception as e:
113 | print e
114 | if self.remove == "yes" or self.remove == "Yes":
115 | if not self.retVal > 0:
116 | shutil.rmtree(directory)
117 | try:
118 | shutil.rmtree(directory)
119 | except:
120 | pass
121 | Parse = Parser.Parser(FinalOutput)
122 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
123 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
124 | return FinalOutput, HtmlResults, JsonResults
125 |
--------------------------------------------------------------------------------
/Modules/Hunter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import configparser
3 | import logging
4 | from Helpers import Download
5 | from Helpers import Parser
6 | from Helpers import helpers
7 |
8 | # Class will have the following properties:
9 | # 1) name / description
10 | # 2) main name called "ClassName"
11 | # 3) execute function (calls everything it needs)
12 | # 4) places the findings into a queue
13 |
14 | # https://api.hunter.io/v2/domain-search?domain=any.com&type=personal&limit=100&offset=0&api_key=your_api_key
15 |
16 |
17 | class ClassName(object):
18 |
19 | def __init__(self, domain, verbose=False):
20 | self.apikey = True
21 | self.name = "Hunter API"
22 | self.description = "Search the Hunter DB for potential emails"
23 | self.domain = domain
24 | config = configparser.ConfigParser()
25 | self.results = []
26 | self.verbose = verbose
27 | try:
28 | self.logger = logging.getLogger("SimplyEmail.Hunter")
29 | config.read('Common/SimplyEmail.ini')
30 | self.UserAgent = str(config['GlobalSettings']['UserAgent'])
31 | self.apikeyv = str(config['APIKeys']['Hunter'])
32 | self.RequestLimit = int(config['Hunter']['RequestLimit'])
33 | self.QuotaLimit = int(config['Hunter']['QuotaLimit'])
34 | self.EmailType = str(config['Hunter']['EmailType'])
35 |
36 | if self.EmailType == "Both":
37 | self.type = ""
38 | self.etype = "total"
39 | elif self.EmailType == "Personal":
40 | self.type = "&type=personal"
41 | self.etype = "personal_emails"
42 | elif self.EmailType == "Generic":
43 | self.type = "&type=generic"
44 | self.etype = "generic_emails"
45 | else:
46 | raise Exception("Email Type setting invalid")
47 | except Exception as e:
48 | self.logger.critical("Hunter module failed to __init__: " + str(e))
49 | print helpers.color(" [*] Error in Hunter settings: " + str(e) + "\n", warning=True)
50 |
51 | def execute(self):
52 | self.logger.debug("Hunter module started")
53 | self.process()
54 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
55 | return FinalOutput, HtmlResults, JsonResults
56 |
57 | def process(self):
58 | dl = Download.Download(self.verbose)
59 | try:
60 | # We will check to see that we have enough requests left to make a search
61 | url = "https://api.hunter.io/v2/account?api_key=" + self.apikeyv
62 | r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
63 | accountInfo = r.json()
64 | quota = int(accountInfo['data']['calls']['available'])
65 | quotaUsed = int(accountInfo['data']['calls']['used'])
66 | if quotaUsed >= self.QuotaLimit:
67 | overQuotaLimit = True
68 | else:
69 | overQuotaLimit = False
70 | except Exception as e:
71 | error = " [!] Hunter API error: " + str(accountInfo['errors'][0]['details'])
72 | print helpers.color(error, warning=True)
73 | try:
74 | # Hunter's API only allows 100 emails per request, so we check the number of emails Hunter has
75 | # on our specified domain, and if it's over 100 we need to make multiple requests to get all of the emails
76 | url = "https://api.hunter.io/v2/email-count?domain=" + self.domain
77 | r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
78 | response = r.json()
79 | totalEmails = int(response['data'][self.etype])
80 | emailsLeft = totalEmails
81 | offset = 0
82 | except Exception as e:
83 | error = "[!] Major issue with Hunter Search: " + str(e)
84 | print helpers.color(error, warning=True)
85 | requestsMade = 0
86 | # Main loop to keep requesting the Hunter API until we get all of the emails they have
87 | while emailsLeft > 0:
88 | try:
89 | if overQuotaLimit or requestsMade + quotaUsed >= self.QuotaLimit:
90 | if self.verbose:
91 | print helpers.color(" [*] You are over your set Quota Limit: " + \
92 | str(quotaUsed) + "/" + str(self.QuotaLimit) + " stopping search", firewall=True)
93 | break
94 | elif self.RequestLimit != 0 and requestsMade >= self.RequestLimit:
95 | if self.verbose:
96 | print helpers.color(" [*] Stopping search due to user set Request Limit", firewall=True)
97 | break
98 |
99 | # This returns a JSON object
100 | url = "https://api.hunter.io/v2/domain-search?domain=" + \
101 | self.domain + self.type + "&limit=100&offset=" + str(offset) + "&api_key=" + self.apikeyv
102 | r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
103 | results = r.json()
104 | emailCount = int(results['meta']['results'])
105 | except Exception as e:
106 | error = " [!] Hunter API error: " + str(results['errors'][0]['details']) + " QUITTING!"
107 | print helpers.color(error, warning=True)
108 | break
109 | try:
110 | # Make sure we don't exceed the index for the 'emails' array in the 'results' Json object
111 | if emailsLeft < 100:
112 | emailCount = emailsLeft
113 | if emailCount > 100:
114 | emailCount = 100
115 | # 1 request is every 10 emails delivered
116 | requestsMade += emailCount // 10
117 | if emailCount % 10 != 0:
118 | requestsMade += 1
119 | # The API starts at 0 for the first value
120 | x = 0
121 | # We will itirate of the Json object for the index objects
122 | while x < emailCount:
123 | self.results.append(results['data']['emails'][int(x)]['value'])
124 | x += 1
125 | emailsLeft -= emailCount
126 | if emailsLeft > 100:
127 | offset += 100
128 | else:
129 | offset += emailsLeft
130 | except Exception as e:
131 | error = " [!] Major issue with search parsing: " + str(e)
132 | print helpers.color(error, warning=True)
133 | break
134 | if self.verbose:
135 | # Print the avalible requests user has if verbose
136 | print helpers.color(' [*] Hunter has completed JSON request', firewall=True)
137 | requestsUsed = requestsMade + quotaUsed
138 | if quota - requestsUsed <= 0:
139 | print helpers.color(" [*] You have no Hunter requests left." \
140 | + "They will refill in about a month", firewall=True)
141 | else:
142 | print helpers.color(" [*] You have " + str(requestsUsed) \
143 | + "/" + str(quota) + " Hunter requests left", firewall=True)
144 |
145 | def get_emails(self):
146 | # Make sure you remove any newlines
147 | Parse = Parser.Parser(self.results)
148 | FinalOutput = Parse.CleanListOutput()
149 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
150 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
151 | self.logger.debug('Hunter completed search')
152 | return FinalOutput, HtmlResults, JsonResults
--------------------------------------------------------------------------------
/Modules/OnInstagram.py.old:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import requests
4 | import configparser
5 | from Helpers import helpers
6 | from Helpers import Parser
7 |
8 |
9 | # Class will have the following properties:
10 | # 1) name / description
11 | # 2) main name called "ClassName"
12 | # 3) execute function (calls everything it needs)
13 | # 4) places the findings into a queue
14 |
15 | # http://www.oninstagram.com/profile/search?query=@gmail.com
16 | # this allows raw query, even major like @gmail
17 |
18 | class ClassName(object):
19 |
20 | def __init__(self, Domain, verbose=False):
21 | self.apikey = False
22 | self.name = "OnInstagram Search For Instagram Users"
23 | self.description = "Uses OnInstagrams search engine"
24 | config = configparser.ConfigParser()
25 | try:
26 | config.read('Common/SimplyEmail.ini')
27 | self.Domain = Domain
28 | self.Html = ""
29 | self.verbose = verbose
30 | except:
31 | print helpers.color(" [*] Major Settings for OnInstagram are missing, EXITING!\n", warning=True)
32 |
33 | def execute(self):
34 | self.process()
35 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
36 | return FinalOutput, HtmlResults, JsonResults
37 |
38 | def process(self):
39 | try:
40 | # page seems to dynamicaly expand :)
41 | url = "http://www.oninstagram.com/profile/search?query=" + \
42 | self.Domain
43 | r = requests.get(url)
44 | except Exception as e:
45 | error = " [!] Major issue with OnInstagram Search:" + str(e)
46 | print helpers.color(error, warning=True)
47 | if self.verbose:
48 | p = ' [*] Instagram search Complete'
49 | print helpers.color(p, firewall=True)
50 | self.Html = r.content
51 |
52 | def get_emails(self):
53 | Parse = Parser.Parser(self.Html)
54 | Parse.genericClean()
55 | Parse.urlClean()
56 | FinalOutput = Parse.GrepFindEmails()
57 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
58 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
59 | return FinalOutput, HtmlResults, JsonResults
60 |
--------------------------------------------------------------------------------
/Modules/PasteBinSearch.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import requests
10 | import time
11 | import logging
12 | from Helpers import Download
13 | from Helpers import helpers
14 | from Helpers import Parser
15 | from bs4 import BeautifulSoup
16 |
17 |
18 | class ClassName(object):
19 |
20 | def __init__(self, Domain, verbose=False):
21 | self.apikey = False
22 | self.name = "PasteBin Search for Emails"
23 | self.description = "Uses pastebin to search for emails, parses them out of the"
24 | config = configparser.ConfigParser()
25 | try:
26 | self.logger = logging.getLogger("SimplyEmail.PasteBinSearch")
27 | config.read('Common/SimplyEmail.ini')
28 | self.Domain = Domain
29 | self.Quanity = int(config['GooglePasteBinSearch']['StartQuantity'])
30 | self.UserAgent = {
31 | 'User-Agent': helpers.getua()}
32 | self.Limit = int(config['GooglePasteBinSearch']['QueryLimit'])
33 | self.Counter = int(config['GooglePasteBinSearch']['QueryStart'])
34 | self.verbose = verbose
35 | self.urlList = []
36 | self.Text = ""
37 | except Exception as e:
38 | self.logger.critical(
39 | 'PasteBinSearch module failed to __init__: ' + str(e))
40 | print helpers.color("[*] Major Settings for PasteBinSearch are missing, EXITING!\n", warning=True)
41 |
42 | def execute(self):
43 | self.logger.debug("PasteBinSearch started")
44 | self.search()
45 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
46 | return FinalOutput, HtmlResults, JsonResults
47 |
48 | def search(self):
49 | dl = Download.Download(self.verbose)
50 | while self.Counter <= self.Limit and self.Counter <= 100:
51 | time.sleep(1)
52 | if self.verbose:
53 | p = ' [*] Google Search for PasteBin on page: ' + \
54 | str(self.Counter)
55 | self.logger.info(
56 | "GooglePasteBinSearch on page: " + str(self.Counter))
57 | print helpers.color(p, firewall=True)
58 | try:
59 | url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + str(self.Counter) + \
60 | '&hl=en&meta=&q=site:pastebin.com+"%40' + \
61 | self.Domain + '"'
62 | except Exception as e:
63 | error = " [!] Major issue with Google Search for PasteBin:" + \
64 | str(e)
65 | self.logger.error(
66 | "GooglePasteBinSearch could not create URL: " + str(e))
67 | print helpers.color(error, warning=True)
68 |
69 | try:
70 | r = requests.get(url, headers=self.UserAgent)
71 | except Exception as e:
72 | error = " [!] Fail during Request to PasteBin (Check Connection):" + str(
73 | e)
74 | self.logger.error(
75 | "Fail during Request to PasteBin (Check Connection): " + str(e))
76 | print helpers.color(error, warning=True)
77 | try:
78 | RawHtml = r.content
79 | try:
80 | # check for captcha in the source
81 | dl.GoogleCaptchaDetection(RawHtml)
82 | except Exception as e:
83 | self.logger.error("Issue checking for captcha: " + str(e))
84 | soup = BeautifulSoup(RawHtml, "lxml")
85 | for a in soup.select('.r a'):
86 | # remove urls like pastebin.com/u/Anonymous
87 | if "/u/" not in str(a['href']):
88 | self.urlList.append(a['href'])
89 | except Exception as e:
90 | error = " [!] Fail during parsing result: " + str(e)
91 | self.logger.error(
92 | "PasteBinSearch Fail during parsing result: " + str(e))
93 | print helpers.color(error, warning=True)
94 | self.Counter += 100
95 | # Now take all gathered URL's and gather the Raw content needed
96 | for Url in self.urlList:
97 | try:
98 | Url = "http://pastebin.com/raw/" + str(Url).split('/')[3]
99 | data = requests.get(Url, timeout=2)
100 | self.Text += data.content
101 | except Exception as e:
102 | error = "[!] Connection Timed out on PasteBin Search:" + str(e)
103 | self.logger.error(
104 | "Connection Timed out on PasteBin raw download: " + str(e))
105 | print helpers.color(error, warning=True)
106 |
107 | if self.verbose:
108 | p = ' [*] Searching PasteBin Complete'
109 | self.logger.info("Searching PasteBin Complete")
110 | print helpers.color(p, firewall=True)
111 |
112 | def get_emails(self):
113 | Parse = Parser.Parser(self.Text)
114 | Parse.genericClean()
115 | Parse.urlClean()
116 | FinalOutput = Parse.GrepFindEmails()
117 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
118 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
119 | self.logger.debug("PasteBinSearch completed search")
120 | return FinalOutput, HtmlResults, JsonResults
121 |
--------------------------------------------------------------------------------
/Modules/RedditPostSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 | import configparser
9 | import time
10 | import logging
11 | from Helpers import Download
12 | from Helpers import helpers
13 | from Helpers import Parser
14 |
15 |
16 | class ClassName(object):
17 |
18 | def __init__(self, Domain, verbose=False):
19 | self.apikey = False
20 | self.name = "RedditPost Search for Emails"
21 | self.description = "Uses RedditPosts to search for emails, and Parse the raw results ATM"
22 | config = configparser.ConfigParser()
23 | try:
24 | self.logger = logging.getLogger("SimplyEmail.RedditPostSearch")
25 | config.read('Common/SimplyEmail.ini')
26 | self.Domain = Domain
27 | self.UserAgent = {
28 | 'User-Agent': helpers.getua()}
29 | self.Limit = int(config['RedditPostSearch']['QueryLimit'])
30 | self.Counter = int(config['RedditPostSearch']['QueryStart'])
31 | self.verbose = verbose
32 | self.Html = ""
33 | except Exception as e:
34 | self.logger.critical(
35 | 'RedditPostSearch module failed to load: ' + str(e))
36 | print helpers.color(" [*] Major Settings for RedditPostSearch are missing, EXITING!\n", warning=True)
37 |
38 | def execute(self):
39 | self.logger.debug("RedditPostSearch started")
40 | self.search()
41 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
42 | return FinalOutput, HtmlResults, JsonResults
43 |
44 | def search(self):
45 | dl = Download.Download(self.verbose)
46 | while self.Counter <= self.Limit and self.Counter <= 1000:
47 | time.sleep(1)
48 | if self.verbose:
49 | p = ' [*] RedditPost Search on result: ' + str(self.Counter)
50 | self.logger.debug(
51 | "RedditPost Search on result: " + str(self.Counter))
52 | print helpers.color(p, firewall=True)
53 | try:
54 | url = "https://www.reddit.com/search?q=%40" + str(self.Domain) + \
55 | "&restrict_sr=&sort=relevance&t=all&count=" + str(self.Counter) + \
56 | '&after=t3_3mkrqg'
57 | except Exception as e:
58 | error = " [!] Major issue with RedditPost search:" + str(e)
59 | self.logger.error(
60 | "Major issue with RedditPostSearch: " + str(e))
61 | print helpers.color(error, warning=True)
62 | try:
63 | RawHtml = dl.requesturl(url, useragent=self.UserAgent)
64 | except Exception as e:
65 | error = " [!] Fail during Request to Reddit (Check Connection):" + \
66 | str(e)
67 | self.logger.error(
68 | "Fail during Request to Reddit (Check Connection): " + str(e))
69 | print helpers.color(error, warning=True)
70 | self.Html += RawHtml
71 | # reddit seems to increment by 25 in cases
72 | self.Counter += 25
73 |
74 | def get_emails(self):
75 | Parse = Parser.Parser(self.Html)
76 | Parse.genericClean()
77 | Parse.urlClean()
78 | FinalOutput = Parse.GrepFindEmails()
79 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
80 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
81 | self.logger.debug("RedditPostSearch completed search")
82 | return FinalOutput, HtmlResults, JsonResults
83 |
--------------------------------------------------------------------------------
/Modules/SearchPGP.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import requests
3 | import configparser
4 | import logging
5 | from Helpers import Parser
6 | from Helpers import helpers
7 |
8 | # Class will have the following properties:
9 | # 1) name / description
10 | # 2) main name called "ClassName"
11 | # 3) execute function (calls everything it needs)
12 | # 4) places the findings into a queue
13 |
14 |
15 | class ClassName(object):
16 |
17 | def __init__(self, domain, verbose=False):
18 | self.apikey = False
19 | self.name = "Searching PGP"
20 | self.description = "Search the PGP database for potential emails"
21 | self.domain = domain
22 | config = configparser.ConfigParser()
23 | self.results = ""
24 | try:
25 | self.logger = logging.getLogger("SimplyEmail.SearchPGP")
26 | config.read('Common/SimplyEmail.ini')
27 | self.server = str(config['SearchPGP']['KeyServer'])
28 | self.hostname = str(config['SearchPGP']['Hostname'])
29 | self.UserAgent = str(config['GlobalSettings']['UserAgent'])
30 | self.verbose = verbose
31 | except Exception as e:
32 | self.logger.critical(
33 | 'SearchPGP module failed to __init__: ' + str(e))
34 | print helpers.color("[*] Major Settings for SearchPGP are missing, EXITING!\n", warning=True)
35 |
36 | def execute(self):
37 | self.logger.debug("SearchPGP started")
38 | self.process()
39 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
40 | return FinalOutput, HtmlResults, JsonResults
41 |
42 | def process(self):
43 | try:
44 | url = "http://pgp.mit.edu/pks/lookup?search=" + \
45 | self.domain + "&op=index"
46 | self.logger.info("Requesting PGP keys")
47 | r = requests.get(url)
48 | except Exception as e:
49 | error = " [!] Major issue with PGP Search:" + str(e)
50 | self.logger.error("Major issue with PGP search: " + str(e))
51 | print helpers.color(error, warning=True)
52 | if self.verbose:
53 | p = ' [*] Searching PGP Complete'
54 | self.logger.info("SearchPGP Completed search")
55 | print helpers.color(p, firewall=True)
56 | self.results = r.content
57 |
58 | def get_emails(self):
59 | Parse = Parser.Parser(self.results)
60 | FinalOutput = Parse.GrepFindEmails()
61 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
62 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
63 | self.logger.debug("SearchPGP completed search")
64 | return FinalOutput, HtmlResults, JsonResults
65 |
--------------------------------------------------------------------------------
/Modules/WhoisAPISearch.py:
--------------------------------------------------------------------------------
1 | # http://api.hackertarget.com/whois/?q=verisgroup.com
2 | # !/usr/bin/env python
3 |
4 | import requests
5 | import configparser
6 | import logging
7 | from Helpers import Parser
8 | from Helpers import helpers
9 |
10 | # Class will have the following properties:
11 | # 1) name / description
12 | # 2) main name called "ClassName"
13 | # 3) execute function (calls everything it needs)
14 | # 4) places the findings into a queue
15 |
16 |
17 | class ClassName(object):
18 |
19 | def __init__(self, domain, verbose=False):
20 | self.apikey = False
21 | self.name = "Searching Whois"
22 | self.description = "Search the Whois database for potential POC emails"
23 | self.domain = domain
24 | config = configparser.ConfigParser()
25 | self.verbose = verbose
26 | self.results = ""
27 | try:
28 | self.logger = logging.getLogger("SimplyEmail.WhoisAPISearch")
29 | config.read('Common/SimplyEmail.ini')
30 | self.UserAgent = str(config['GlobalSettings']['UserAgent'])
31 | except Exception as e:
32 | self.logger.critical(
33 | 'WhoisAPISearch module failed to __init__: ' + str(e))
34 | print helpers.color(" [*] Major Settings for Search Whois are missing, EXITING!\n", warning=True)
35 |
36 | def execute(self):
37 | self.logger.debug("WhoisAPISearch Started")
38 | self.process()
39 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
40 | return FinalOutput, HtmlResults, JsonResults
41 |
42 | def process(self):
43 | try:
44 | if self.verbose:
45 | p = ' [*] Requesting API on HackerTarget whois'
46 | self.logger.info("Requesting API on HackerTarget whois")
47 | print helpers.color(p, firewall=True)
48 | url = "http://api.hackertarget.com/whois/?q=" + \
49 | self.domain
50 | r = requests.get(url)
51 | except Exception as e:
52 | error = " [!] Major issue with Whois Search:" + str(e)
53 | self.logger.error(
54 | "Failed to request URL (Check Connection): " + str(e))
55 | print helpers.color(error, warning=True)
56 | self.results = r.content
57 |
58 | def get_emails(self):
59 | Parse = Parser.Parser(self.results)
60 | FinalOutput = Parse.GrepFindEmails()
61 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
62 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
63 | self.logger.debug('WhoisAPISearch completed search')
64 | return FinalOutput, HtmlResults, JsonResults
65 |
--------------------------------------------------------------------------------
/Modules/Whoisolgy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import requests
3 | import configparser
4 | import logging
5 | from Helpers import Parser
6 | from Helpers import helpers
7 |
8 | # Class will have the following properties:
9 | # 1) name / description
10 | # 2) main name called "ClassName"
11 | # 3) execute function (calls everything it needs)
12 | # 4) places the findings into a queue
13 |
14 | # https://whoisology.com/archive_11/microsoft.com
15 |
16 |
17 | class ClassName(object):
18 |
19 | def __init__(self, domain, verbose=False):
20 | self.apikey = False
21 | self.name = "Searching Whoisology"
22 | self.logger = logging.getLogger("SimplyEmail.Whoisology")
23 | self.description = "Search the Whoisology database for potential POC emails"
24 | self.domain = domain
25 | config = configparser.ConfigParser()
26 | self.results = ""
27 | try:
28 | config.read('Common/SimplyEmail.ini')
29 | self.UserAgent = {
30 | 'User-Agent': helpers.getua()}
31 | self.verbose = verbose
32 | except Exception as e:
33 | self.logger.critical(
34 | 'Whoisology module failed to __init__: ' + str(e))
35 | print helpers.color("[*] Major Settings for Search Whoisology are missing, EXITING!\n", warning=True)
36 |
37 | def execute(self):
38 | self.logger.debug("Whoisology Started")
39 | self.process()
40 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
41 | return FinalOutput, HtmlResults, JsonResults
42 |
43 | def process(self):
44 | try:
45 | if self.verbose:
46 | self.logger.info("Whoisology request started")
47 | p = ' [*] Whoisology request started'
48 | print helpers.color(p, firewall=True)
49 | url = "https://whoisology.com/archive_11/" + \
50 | self.domain
51 | r = requests.get(url)
52 | except Exception as e:
53 | error = "[!] Major issue with Whoisology Search:" + str(e)
54 | self.logger.error("Whoisology can download source (Check Connection)")
55 | print helpers.color(error, warning=True)
56 | self.results = r.content
57 |
58 | def get_emails(self):
59 | Parse = Parser.Parser(self.results)
60 | Parse.genericClean()
61 | Parse.urlClean()
62 | FinalOutput = Parse.GrepFindEmails()
63 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
64 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
65 | self.logger.debug('Whoisology completed search')
66 | return FinalOutput, HtmlResults, JsonResults
67 |
--------------------------------------------------------------------------------
/Modules/YahooSearch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Class will have the following properties:
4 | # 1) name / description
5 | # 2) main name called "ClassName"
6 | # 3) execute function (calls everything it needs)
7 | # 4) places the findings into a queue
8 |
9 | # Adapted from theHarvester:
10 | # https://github.com/laramies/theHarvester/blob/master/discovery/yahoosearch.py
11 | # https://emailhunter.co
12 |
13 | import configparser
14 | import requests
15 | import time
16 | import logging
17 | from Helpers import helpers
18 | from Helpers import Parser
19 |
20 |
21 | class ClassName(object):
22 |
23 | def __init__(self, Domain, verbose=False):
24 | self.apikey = False
25 | self.name = "Yahoo Search for Emails"
26 | self.description = "Uses Yahoo to search for emails, parses them out of the Html"
27 | config = configparser.ConfigParser()
28 | try:
29 | config.read('Common/SimplyEmail.ini')
30 | self.logger = logging.getLogger("SimplyEmail.YahooSearch")
31 | self.Domain = Domain
32 | self.Quanity = int(config['YahooSearch']['StartQuantity'])
33 | self.UserAgent = {
34 | 'User-Agent': helpers.getua()}
35 | self.Limit = int(config['YahooSearch']['QueryLimit'])
36 | self.Counter = int(config['YahooSearch']['QueryStart'])
37 | self.Sleep = int(config['SleepConfig']['QuerySleep'])
38 | self.Jitter = int(config['SleepConfig']['QueryJitter'])
39 | self.verbose = verbose
40 | self.Html = ""
41 | except Exception as e:
42 | self.logger.critical(
43 | 'YahooSearch module failed to load: ' + str(e))
44 | print helpers.color(" [*] Major Settings for YahooSearch are missing, EXITING!\n", warning=True)
45 |
46 | def execute(self):
47 | self.logger.debug("AskSearch Started")
48 | self.search()
49 | FinalOutput, HtmlResults, JsonResults = self.get_emails()
50 | return FinalOutput, HtmlResults, JsonResults
51 |
52 | def search(self):
53 | while self.Counter <= self.Limit and self.Counter <= 1000:
54 | time.sleep(1)
55 | if self.verbose:
56 | p = ' [*] Yahoo Search on page: ' + str(self.Counter)
57 | self.logger.info("YahooSearch on page:" + str(self.Counter))
58 | print helpers.color(p, firewall=True)
59 | try:
60 | url = 'https://search.yahoo.com/search?p=' + str(self.Domain) + \
61 | '&b=' + str(self.Counter) + "&pz=" + str(self.Quanity)
62 | except Exception as e:
63 | error = " [!] Major issue with Yahoo Search:" + str(e)
64 | self.logger.error("Yahoo Search can not create URL:")
65 | print helpers.color(error, warning=True)
66 | try:
67 | self.logger.debug("YahooSearch starting request on: " + str(url))
68 | r = requests.get(url, headers=self.UserAgent)
69 | except Exception as e:
70 | error = " [!] Fail during Request to Yahoo (Check Connection):" + \
71 | str(e)
72 | self.logger.error("YahooSearch failed to request (Check Connection)")
73 | print helpers.color(error, warning=True)
74 | results = r.content
75 | self.Html += results
76 | self.Counter += 100
77 | #helpers.modsleep(self.Sleep, jitter=self.Jitter)
78 |
79 | def get_emails(self):
80 | Parse = Parser.Parser(self.Html)
81 | Parse.genericClean()
82 | Parse.urlClean()
83 | FinalOutput = Parse.GrepFindEmails()
84 | HtmlResults = Parse.BuildResults(FinalOutput, self.name)
85 | JsonResults = Parse.BuildJson(FinalOutput, self.name)
86 | self.logger.debug('YahooSearch completed search')
87 | return FinalOutput, HtmlResults, JsonResults
--------------------------------------------------------------------------------
/Modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/Modules/__init__.py
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/SimplySecurity/SimplyEmail)
2 | [](https://coveralls.io/github/killswitch-GUI/SimplyEmail?branch=Version-1.4)
3 |
4 | 
5 |
6 | ----
7 | SimplyEmail was built arround the concept that tools should do somthing, and do that somthing well, hence 'Simply'. Full documentation can be found at:
8 |
9 | [INSTALL / FAQ] https://simplysecurity.github.io/SimplyEmail/
10 |
11 | [CHANGELOG] https://github.com/SimplySecurity/SimplyEmail/blob/master/CHANGELOG.md
12 |
13 | [HELP/QUESTIONS/CHAT] Join us at: https://simplysecurity.herokuapp.com
14 |
15 | ## TL;DR
16 | Supported Platforms / Tested with CI:
17 | * **Docker**
18 | * Kali Rolling
19 | * Debian 9.x
20 | * Ubuntu 16.04 LTS
21 | * MacOS
22 |
23 |
24 | ```bash
25 | root@kali:~# curl -s https://raw.githubusercontent.com/killswitch-GUI/SimplyEmail/master/setup/oneline-setup.sh | bash
26 | root@kali:~# cd SimplyEmail
27 | (SE) root@kali:~/SimplyEmail# ./SimplyEmail.py
28 | ```
29 |
30 | or
31 |
32 | ```bash
33 | root@kali:~# docker pull simplysecurity/simplyemail
34 | root@kali:~# docker run -ti simplysecurity/simplyemail
35 | ```
36 |
--------------------------------------------------------------------------------
/SimplyEmail.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Inspired by theHarvester and the capabilities. This project is simply a learning experience of
4 | # recon methods to obtain email address and the way you can go about it.
5 | # Also I really wanted the ability to learn SQL, and make this tool multi-threaded!
6 | #
7 | # * = Require API Key
8 | #
9 | import os
10 | import argparse
11 | import sys
12 | import configparser
13 | from Helpers import helpers
14 | from Helpers import VersionCheck
15 | from Common import TaskController
16 |
17 |
18 |
19 |
20 | def cli_parser():
21 | parser = argparse.ArgumentParser(add_help=False, description='''
22 | Email enumeration is a important phase of so many operation that a pen-tester or\n
23 | Red Teamer goes through. There are tons of applications that do this but I wanted\n
24 | a simple yet effective way to get what Recon-Ng gets and theHarvester gets.\n
25 | (You may want to run -h)
26 | ''')
27 | parser.add_argument(
28 | "-all", action='store_true', help="Use all non API methods to obtain Emails")
29 | parser.add_argument("-e", metavar="company.com", default="",
30 | help="Set required email addr user, ex ale@email.com")
31 | parser.add_argument(
32 | "-l", action='store_true', help="List the current Modules Loaded")
33 | parser.add_argument(
34 | "-t", metavar="html / flickr / google", help="Test individual module (For Linting)")
35 | parser.add_argument(
36 | "-s", action='store_true', help="Set this to enable 'No-Scope' of the email parsing")
37 | parser.add_argument(
38 | "-n", action='store_true', help="Set this to enable Name Generation")
39 | parser.add_argument(
40 | "-verify", action='store_true', help="Set this to enable SMTP server email verify")
41 | parser.add_argument(
42 | "-v", action='store_true', help="Set this switch for verbose output of modules")
43 | parser.add_argument(
44 | "--json", metavar='json-emails.txt', default="",
45 | help="Set this switch for json output to specfic file")
46 | parser.add_argument('-h', '-?', '--h', '-help',
47 | '--help', action="store_true", help=argparse.SUPPRESS)
48 | args = parser.parse_args()
49 | if args.h:
50 | parser.print_help()
51 | sys.exit()
52 | return args.all, args.e, args.l, args.t, args.s, args.n, args.verify, args.v, args.json
53 |
54 |
55 | def TaskStarter(version):
56 | # Get all the options passed and pass it to the TaskConducter, this will
57 | # keep all the processing on the side.
58 | # need to pass the store true somehow to tell printer to restrict output
59 | log = helpers.log()
60 | log.start()
61 | cli_all, cli_domain, cli_list, cli_test, cli_scope, cli_names, cli_verify, cli_verbose, cli_json = cli_parser()
62 | cli_domain = cli_domain.lower()
63 | Task = TaskController.Conducter()
64 | Task.load_modules()
65 | if cli_list:
66 | log.infomsg("Tasked to List Modules", "Main")
67 | Task.ListModules()
68 | V = VersionCheck.VersionCheck(version)
69 | V.VersionRequest()
70 | sys.exit(0)
71 | if not len(cli_domain) > 1:
72 | log.warningmsg("Domain not supplied", "Main")
73 | print helpers.color("[*] No Domain Supplied to start up!\n", warning=True)
74 | sys.exit(0)
75 | if cli_test:
76 | # setup a small easy test to activate certain modules
77 | log.infomsg("Tasked to Test Module: " + cli_test, "Main")
78 | V = VersionCheck.VersionCheck(version)
79 | V.VersionRequest()
80 | Task.TestModule(cli_domain, cli_test, verbose=cli_verbose,
81 | scope=cli_scope, Names=cli_names, Verify=cli_verify,
82 | json=cli_json)
83 | if cli_all:
84 | log.infomsg("Tasked to run all Modules on domain: " + cli_domain, "Main")
85 | V = VersionCheck.VersionCheck(version)
86 | V.VersionRequest()
87 | Task.TaskSelector(cli_domain, verbose=cli_verbose,
88 | scope=cli_scope, Names=cli_names, Verify=cli_verify,
89 | json=cli_json)
90 |
91 |
92 | # def GenerateReport():
93 | # BootStrap with tables :)
94 | # Make a seprate reporting module fo sure way to busy here
95 |
96 |
97 | def main():
98 | # instatiate the class
99 | try:
100 | config = configparser.ConfigParser()
101 | config.read('Common/SimplyEmail.ini')
102 | version = str(config['GlobalSettings']['Version'])
103 | except Exception as e:
104 | print e
105 | orc = TaskController.Conducter()
106 | orc.title()
107 | orc.title_screen()
108 | TaskStarter(version)
109 |
110 |
111 | if __name__ == "__main__":
112 | try:
113 | main()
114 | except KeyboardInterrupt:
115 | print 'Interrupted'
116 | try:
117 | sys.exit(0)
118 | except SystemExit:
119 | os._exit(0)
120 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 1.4.10
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect
--------------------------------------------------------------------------------
/setup/License/LICENSE-BootStrap-Twitter:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2011-2015 Twitter, Inc
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/setup/License/LICENSE-Version:
--------------------------------------------------------------------------------
1 | v1.5.0
2 |
--------------------------------------------------------------------------------
/setup/License/LICENSE-theHarvester:
--------------------------------------------------------------------------------
1 | Released under the GPL v 2.0.
2 |
3 | If you did not recieve a copy of the GPL, try http://www.gnu.org/.
4 |
5 | Copyright 2011 Christian Martorella
6 |
7 | theHarvester is free software; you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation version 2 of the License.
10 |
11 | theHarvester is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU General Public License for more details.
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 |
--------------------------------------------------------------------------------
/setup/oneline-setup.sh:
--------------------------------------------------------------------------------
1 | func_check_env(){
2 | # Check Sudo Dependency going to need that!
3 | if [[ "$OSTYPE" == "darwin"* ]]; then
4 | # MacOS / OS X
5 | xcode-select --install
6 | /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
7 | brew install curl
8 | brew install git
9 | fi
10 |
11 | if [ -f /etc/debian_version ]; then
12 | sudo apt install -y git
13 | sudo apt install -y curl
14 | sudo apt install -y sudo
15 | fi
16 |
17 | if [ -f /etc/redhat-release ]; then
18 | sudo dnf install -y git
19 | sudo dnf install -y curl
20 | sudo dnf install -y sudo
21 | fi
22 |
23 | if [ -f /etc/lsb-release ]; then
24 | sudo apt-get -y install git
25 | sudo apt-get -y install curl
26 | sudo apt-get -y install sudo
27 | fi
28 |
29 | if [ $(which sudo|wc -l) -eq '0' ]; then
30 | echo
31 | echo ' [ERROR]: This Setup Script Requires sudo!'
32 | echo ' Please Install sudo Then Run This Setup Again.'
33 | echo
34 | exit 1
35 | fi
36 |
37 | git clone --branch master https://github.com/killswitch-GUI/SimplyEmail.git
38 | cd SimplyEmail
39 | ./setup/setup.sh
40 | }
41 |
42 |
43 | case $1 in
44 | *)
45 | func_check_env
46 | ;;
47 |
48 | esac
49 |
--------------------------------------------------------------------------------
/setup/requirments.txt:
--------------------------------------------------------------------------------
1 | BeautifulSoup==3.2.1
2 | beautifulsoup4==4.6.0
3 | certifi==2017.7.27.1
4 | chardet==3.0.4
5 | configparser==3.5.0
6 | dnspython==1.15.0
7 | docx2txt==0.6
8 | html5lib==0.999999999
9 | idna==2.6
10 | mechanize==0.3.6
11 | pdfminer==20140328
12 | python-magic==0.4.13
13 | requests==2.18.4
14 | six==1.11.0
15 | urllib3==1.22
16 | webencodings==0.5.1
17 | xlsx2csv==0.7.3
18 | XlsxWriter==1.0.2
19 | lxml==4.1.0
20 | coverage==4.4.1
21 | coveralls==1.2.0
22 | nose==1.3.7
23 | fake-useragent==0.1.8
24 |
--------------------------------------------------------------------------------
/setup/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Global Variables
4 | runuser=$(whoami)
5 | tempdir=$(pwd)
6 |
7 | # Title Function
8 | func_title(){
9 | # Clear (For Prettyness)
10 | clear
11 |
12 | # Echo Title
13 | echo '=========================================================================='
14 | echo ' SimpleEmail Setup Script | [Updated]: 2016'
15 | echo '=========================================================================='
16 | echo ' [Web]: Http://CyberSyndicates.com | [Twitter]: @KillSwitch-GUI'
17 | echo '=========================================================================='
18 | }
19 |
20 |
21 |
22 | # Environment Checks
23 | func_check_env(){
24 | # Check Sudo Dependency going to need that!
25 | if [ $(which sudo|wc -l) -eq '0' ]; then
26 | echo
27 | echo ' [ERROR]: This Setup Script Requires sudo!'
28 | echo ' Please Install sudo Then Run This Setup Again.'
29 | echo
30 | exit 1
31 | fi
32 | }
33 |
34 | func_install_requests(){
35 | if [[ "$OSTYPE" == "darwin"* ]]; then
36 | # MacOS / OS X
37 | if ! brew --version &>/dev/null; then
38 | echo "[*] Failed to find brew, installing now"
39 | xcode-select --install
40 | /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
41 | fi
42 | sudo easy_install pip
43 | brew install libmagic
44 | brew install curl
45 | brew install autoenv
46 | brew install git
47 | pip install python-magic
48 | echo "source $(brew --prefix autoenv)/activate.sh" >> ~/.bash_profile
49 | fi
50 |
51 | if [ -f /etc/redhat-release ]; then
52 | sudo dnf install -y git
53 | sudo dnf install -y python-lxml
54 | sudo dnf install -y wget grep antiword odt2txt python-devel libxml2-devel libxslt1-devel
55 | sudo dnf install -y python-virtualenv
56 | fi
57 |
58 | if [ -f /etc/lsb-release ]; then
59 | sudo apt-get -y install git
60 | sudo apt-get -y install python-lxml
61 | sudo apt-get -y install wget grep antiword odt2txt python-dev libxml2-dev libxslt1-dev
62 | sudo apt-get -y install python-virtualenv
63 | fi
64 |
65 | if [ -f /etc/debian_version ]; then
66 | sudo apt install -y git
67 | sudo apt install -y python-lxml
68 | sudo apt install -y wget grep antiword odt2txt python-dev libxml2-dev libxslt1-dev
69 | sudo apt install -y python-virtualenv
70 | fi
71 |
72 | # Check for PIP otherwise install it
73 | if ! which pip > /dev/null; then
74 | wget https://bootstrap.pypa.io/get-pip.py
75 | python get-pip.py
76 | rm get-pip.py
77 | fi
78 | }
79 |
80 | func_install_env(){
81 | if [ -f /.dockerenv ]; then
82 | echo " [*] Currently installing to Docker, skipping Python Virtenv"
83 | else
84 | # Setup virtual env
85 | pip install autoenv
86 | echo "source `which activate.sh`" >> ~/.bashrc
87 | virtualenv --no-site-packages SE
88 | source SE/bin/activate
89 | fi
90 | }
91 |
92 | func_install_pip(){
93 | pip install -r setup/requirments.txt
94 | }
95 |
96 | # Menu Case Statement
97 | case $1 in
98 | *)
99 | func_title
100 | func_check_env
101 | func_install_requests
102 | func_install_env
103 | func_install_pip
104 | ;;
105 |
106 | esac
107 |
108 |
--------------------------------------------------------------------------------
/tests/Test-DOC.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/tests/Test-DOC.doc
--------------------------------------------------------------------------------
/tests/Test-DOCX.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/tests/Test-DOCX.docx
--------------------------------------------------------------------------------
/tests/Test-PDF.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/tests/Test-PDF.pdf
--------------------------------------------------------------------------------
/tests/Test-PPTX.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimplySecurity/SimplyEmail/6a42d373a13b258e90d61efc82c527c5b754a9b8/tests/Test-PPTX.pptx
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------