├── .github
    └── workflows
    │   └── codeql-analysis.yml
├── .gitignore
├── .travis.yml
├── Code_Of_Conduct.md
├── Dockerfile
├── LICENSE.txt
├── README.md
├── images
    ├── README.md
    ├── machinae-square.jpg
    ├── machinae.jpg
    ├── robot-plainer.jpg
    └── t-machinae.jpg
├── machinae.yml
├── pylintrc
├── requirements.txt
├── setup.py
└── src
    └── machinae
        ├── __init__.py
        ├── cmd.py
        ├── outputs.py
        ├── sites
            ├── __init__.py
            ├── base.py
            ├── csv.py
            ├── html.py
            ├── ipwhois.py
            ├── json.py
            └── rss.py
        └── utils.py


/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master, ]
 6 |   pull_request:
 7 |     # The branches below must be a subset of the branches above
 8 |     branches: [master]
 9 |   schedule:
10 |     - cron: '0 2 * * 6'
11 | 
12 | jobs:
13 |   analyze:
14 |     name: Analyze
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - name: Checkout repository
19 |       uses: actions/checkout@v2
20 |       with:
21 |         # We must fetch at least the immediate parents so that if this is
22 |         # a pull request then we can checkout the head.
23 |         fetch-depth: 2
24 | 
25 |     # If this run was triggered by a pull request event, then checkout
26 |     # the head of the pull request instead of the merge commit.
27 |     - run: git checkout HEAD^2
28 |       if: ${{ github.event_name == 'pull_request' }}
29 | 
30 |     # Initializes the CodeQL tools for scanning.
31 |     - name: Initialize CodeQL
32 |       uses: github/codeql-action/init@v1
33 |       # Override language selection by uncommenting this and choosing your languages
34 |       with:
35 |         languages: python
36 | 
37 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
38 |     # If this step fails, then you should remove it and run the build manually (see below)
39 |     - name: Autobuild
40 |       uses: github/codeql-action/autobuild@v1
41 | 
42 |     # ℹ️ Command-line programs to run using the OS shell.
43 |     # 📚 https://git.io/JvXDl
44 | 
45 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
46 |     #    and modify them (or add more) to build your code if your project
47 |     #    uses a compiled language
48 | 
49 |     #- run: |
50 |     #   make bootstrap
51 |     #   make release
52 | 
53 |     - name: Perform CodeQL Analysis
54 |       uses: github/codeql-action/analyze@v1
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | .python-version
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | 
44 | # Translations
45 | *.mo
46 | *.pot
47 | 
48 | # Django stuff:
49 | *.log
50 | 
51 | # Sphinx documentation
52 | docs/_build/
53 | 
54 | # PyBuilder
55 | target/
56 | 
57 | #Vim
58 | *.swp
59 | 
60 | # Divshot
61 | .divshot-cache/
62 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.6"
 5 |   - "3.7"
 6 |   - "3.8"
 7 |   - "3.9-dev"
 8 | 
 9 | before_install:
10 |   - pip install pandoc
11 |   - pip install pylint
12 | script:
13 |   - pip install -r requirements.txt
14 |   - pylint src/machinae/*.py
15 |   - pylint src/machinae/sites/*.py
16 | 
17 | deploy:
18 |   provider: pypi
19 |   user: billfordx
20 |   password:
21 |     secure: PyOibJ0cErm9yCOfgWvToefrnCrwt3iw7H4eU7hdg4x73DXyqVRNHaJDTvfiVWJyJSNRxPe2r80v7VzUKO24Lqgp7FEpf+4dNbEJtJJEis93vYxOerYXthO/VUIh3yk7ULq9YIAn+65XgNRUk/YllebvOpHLnwNh8FQn63HesDVkCrcuiNFjALqC3SNKcg8vQxrBJzXo+f36a45BgZiQ20qZ8czechXKhi1UVWdQ8ezS/+4YAZcdudD3A0+qnfPd0ve0zfpIrm7ZsyQ9jyDXtnWw7QlOLOuQcT3o4OH9WHrtxjrFONtjg4zZnT9gygxUycgWz2NNVqVWx57ZkZImjAVaf8p7Ym/0DKLuMix2f+K5iMtVlKtYnb8ZKCj4UuaNrNmHrbDj7PasckezKbQF+TwMW9UoG54qh3q1fa+l13rZ3kTcjxg1Wn5RXv6/aw/i+3TGHW2hO0eWxAjgRl741NAzZDVuh0PAenYK8DETT2ZUIlU3VnzCbzi6jTunwV8UsToERHyla3GuiykTlmIOb/3THYIs+n7kffH89b1GlOj/+joLWL12AY5dG4zrhv2VYqt+erJ65K34/nJLk19S+KPqIpYKn/dj1cGzE3y2awiADR4nJbDH87BioqjTQ1fV8bxwPmyl0bGEzOoH9DQnFy/hAc6E9RNWkDIJKUUOEH0=
22 |   on:
23 |     tags: true
24 |     branch: master
25 | 


--------------------------------------------------------------------------------
/Code_Of_Conduct.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at billford+gitmach@billford.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3
2 | 
3 | RUN pip3 install machinae
4 | 
5 | #make sure you have a machinae.yml file to build with
6 | COPY machinae.yml /etc
7 | 
8 | ENTRYPOINT ["/usr/local/bin/machinae"]
9 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Hurricane Labs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/HurricaneLabs/machinae.svg?branch=master)](https://travis-ci.org/HurricaneLabs/machinae)
  2 | [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/2344/badge)](https://bestpractices.coreinfrastructure.org/projects/2344)
  3 | [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=HurricaneLabs_machinae&metric=alert_status)](https://sonarcloud.io/dashboard?id=HurricaneLabs_machinae)
  4 | 
  5 | ![Machinae Logo](images/machinae.jpg)
  6 | 
  7 | 
  8 | Machinae Security Intelligence Collector
  9 | ========================================
 10 | 
 11 | Machinae is a tool for collecting intelligence from public sites/feeds about
 12 | various security-related pieces of data: IP addresses, domain names, URLs,
 13 | email addresses, file hashes and SSL fingerprints. It was inspired by
 14 | [Automater][1], another excellent tool for collecting information. The Machinae
 15 | project was born from wishing to improve Automater in 4 areas:
 16 | 
 17 | 1. Codebase - Bring Automater to python3 compatibility while making the code
 18 | more pythonic
 19 | 2. Configuration - Use a more human readable configuration format (YAML)
 20 | 3. Inputs - Support JSON parsing out-of-the-box without the need to write
 21 | regular expressions, but still support regex scraping when needed
 22 | 4. Outputs - Support additional output types, including JSON, while making
 23 | extraneous output optional
 24 | 
 25 | 
 26 | Installation
 27 | ------------
 28 | 
 29 | Machinae can be installed using pip3:
 30 | 
 31 |     pip3 install machinae
 32 | 
 33 | Or, if you're feeling adventurous, can be installed directly from github:
 34 | 
 35 |     pip3 install git+https://github.com/HurricaneLabs/machinae.git
 36 | 
 37 | You will need to have whatever dependencies are required on your system for
 38 | compiling Python modules (on Debian based systems, `python3-dev`), as well as
 39 | the libyaml development package (on Debian based systems, `libyaml-dev`).
 40 | 
 41 | You'll also want to grab the [latest configuration file][2] and place it in
 42 | `/etc/machinae.yml`.
 43 | 
 44 | 
 45 | Configuration File
 46 | ------------------
 47 | 
 48 | Machinae supports a simple configuration merging system to allow you to make
 49 | adjustments to the configuration without modifying the machinae.yml we provide
 50 | you, making configuration updates a snap. This is done by finding a system-wide
 51 | default configuration (default `/etc/machinae.yml`), merging into that a
 52 | system-wide local configuration (`/etc/machinae.local.yml`) and finally a
 53 | per-user local configuration (`~/.machinae.yml`). The system-wide configuration
 54 | can also be located in the current working directory, can be set using the
 55 | `MACHINAE_CONFIG` environment variable, or of course by using the `-c` or
 56 | `--config` command line options. Configuration merging can be disabled by
 57 | passing the `--nomerge` option, which will cause Machinae to only load the
 58 | default system-wide configuration (or the one passed on the command line).
 59 | 
 60 | As an example of this, say you'd like to enable the Fortinet Category site,
 61 | which is disabled by default. You could modify `/etc/machinae.yml`, but these
 62 | changes would be overwritten by an update. Instead, you can put the following
 63 | in either `/etc/machinae.local.yml` or `~/.machinae.yml`:
 64 | 
 65 |     fortinet_classify:
 66 |       default: true
 67 | 
 68 | Or, conversely, to disable a site, such as Virus Total pDNS:
 69 | 
 70 |     vt_ip:
 71 |       default: false
 72 |     vt_domain:
 73 |       default: false
 74 | 
 75 | 
 76 | Usage
 77 | -----
 78 | 
 79 | Machinae usage is very similar to Automater:
 80 | 
 81 |     usage: machinae [-h] [-c CONFIG] [--nomerge] [-d DELAY] [-f FILE] [-i INFILE] [-v]
 82 |                 [-o {D,J,N,S}] [-O {ipv4,ipv6,fqdn,email,sslfp,hash,url}] [-q]
 83 |                 [-s SITES] [-a AUTH] [-H HTTP_PROXY]
 84 |                 [--dump-config | --detect-otype]
 85 |                 ...
 86 | 
 87 | - See above for details on the `-c`/`--config` and `--nomerge` options.
 88 | 
 89 | - Machinae supports a `-d`/`--delay` option, like Automater. However, Machinae
 90 | uses 0 by default.
 91 | 
 92 | - Machinae output is controlled by two arguments:
 93 |     - `-o` controls the output format, and can be followed by a single character
 94 | 	to indicated the desired type of output:
 95 | 		- *N* is the default output ("Normal")
 96 | 		- *D* is the default output, but dot characters are replaced
 97 | 		- *J* is JSON output
 98 | 	- `-f`/`--file` specifies the file where output should be written. The default
 99 | 	is "-" for stdout.
100 | 
101 | - Machinae will attempt to auto-detect the type of target passed in (Machinae
102 | refers to targets as "observables" and the type as "otype"). This detection can
103 | be overridden with the `-O`/`--otype` option. The choices are listed in the
104 | usage
105 | 
106 | - By default, Machinae operates in verbose mode. In this mode, it will output
107 | status information about the services it is querying on the console as they are
108 | queried. This output will always be written to stdout, regardless of the output
109 | setting. To disable verbose mode, use `-q`
110 | 
111 | - By default, Machinae will run through all services in the configuration that
112 | apply to each target's otype *and* are not marked as "default: false". To modify
113 | this behavior, you can:
114 |     - Pass a comma separated list of sites to run (use the top level key from the
115 | 	configuration).
116 | 	- Pass the special keyword `all` to run through all services *including* those
117 | 	marked as "default: false"
118 | 
119 | 	Note that in both cases, `otype` validation is still applied.
120 | 
121 | - Machinae supports passing an HTTP proxy on the command line using the
122 | `-H`/`--http-proxy` argument. If no proxy is specified, machinae will search the
123 | standard `HTTP_PROXY` and `HTTPS_PROXY` environment variables, as well as the
124 | less standard `http_proxy` and `https_proxy` environment variables.
125 | 
126 | - Lastly, a list of targets should be passed. All arguments other than the
127 | options listed above will be interpreted as targets.
128 | 
129 | 
130 | Out-of-the-Box Data Sources
131 | ---------------------------
132 | 
133 | Machinae comes with out-of-the-box support for the following data sources:
134 | 
135 | - IPVoid
136 | - URLVoid
137 | - URL Unshortener (http://www.toolsvoid.com/unshorten-url)
138 | - Malc0de
139 | - SANS
140 | - FreeGeoIP (freegeoip.io)
141 | - Fortinet Category
142 | - VirusTotal pDNS (via web scrape - commented out)
143 | - VirusTotal pDNS (via JSON API)
144 | - VirusTotal URL Report (via JSON API)
145 | - VirusTotal File Report (via JSON API)
146 | - Reputation Authority
147 | - ThreatExpert
148 | - VxVault
149 | - ProjectHoneypot
150 | - McAfee Threat Intelligence
151 | - StopForumSpam
152 | - Cymru MHR
153 | - ICSI Certificate Notary
154 | - TotalHash (disabled by default)
155 | - DomainTools Parsed Whois (Requires API key)
156 | - DomainTools Reverse Whois (Requires API key)
157 | - DomainTools Reputation
158 | - IP WHOIS (Using RIR REST interfaces)
159 | - Hacked IP
160 | - Metadefender Cloud (Requires API key)
161 | - GreyNoise (Requires API key)
162 | - IBM XForce (Required API key)
163 | 
164 | With additional data sources on the way.
165 | 
166 | HTTP Basic Authentication and Configuration
167 | -------------------------------------------
168 | 
169 | Machinae supports HTTP Basic Auth for sites that require it through the `--auth/-a`
170 | flag. You will need to create a YAML file with your credentials, which will include
171 | a key to the site that requires the credentials and a list of two items, username
172 | and password or API key. For example, for the included PassiveTotal site this might
173 | look like:
174 | 
175 |     passivetotal: ['myemail@example.com', 'my_api_key']
176 | 
177 | Inside the site configuration under `request` you will see a key such as:
178 | 
179 |     json:
180 |       request:
181 |         url: '...'
182 |         auth: passivetotal
183 | 
184 | The `auth: passivetotal` points to the key inside the authentication config passed
185 | via the command line.
186 | 
187 | ### Disabled by default
188 | 
189 | The following sites are disabled by default
190 | 
191 | - Fortinet Category (`fortinet_classify`)
192 | - Telize Geo IP (`telize`)
193 | - TotalHash (`totalhash_ip`)
194 | - DomainTools Parsed Whois (`domaintools_parsed_whois`)
195 | - DomainTools Reverse Whois (`domaintools_reverse_whois`)
196 | - DomainTools Reputation (`domaintools_reputation`)
197 | - PassiveTotal Passive DNS (`passivetotal_pdns`)
198 | - PassiveTotal Whois (`passivetotal_whois`)
199 | - PassiveTotal SSL Certificate History (`passivetotal_sslcert`)
200 | - PassiveTotal Host Attribute Components (`passivetotal_components`)
201 | - PassiveTotal Host Attribute Trackers (`passivetotal_trackers`)
202 | - MaxMind GeoIP2 Passive Insight (`maxmind`)
203 | - FraudGuard (`fraudguard`)
204 | - Shodan (`shodan`)
205 | - Hacked IP
206 | - Metadefender Cloud (Requires API key)
207 | - GreyNoise (Requires API key)
208 | - IBM XForce (Requires API key)
209 | 
210 | Output Formats
211 | --------------
212 | 
213 | Machinae comes with a limited set of output formats: normal, normal with dot
214 | escaping, and JSON. We plan to add additional output formats in the future.
215 | 
216 | 
217 | Adding additional sites
218 | -----------------------
219 | 
220 | *** COMING SOON ***
221 | 
222 | 
223 | Known Issues
224 | ------------
225 | 
226 | - Some ISP's on IPvoid contain double-encoded HTML entities, which are not
227 | double-decoded
228 | 
229 | 
230 | Upcoming Features
231 | -----------------
232 | 
233 | - Add IDS rule search functionality (VRT/ET)
234 | - Add "More info" link for sites
235 | - Add "dedup" option to parser settings
236 | - Add option for per-otype request settings
237 | - Add custom per-site output for error codes
238 | 
239 | 
240 | Version History
241 | ---------------
242 | 
243 | ### Version 1.4.9 (2020-11-25) ###
244 | - Fix bug in JSON as_time processing when time is an epoch time, but str type
245 | 
246 | ### Version 1.4.1 (2018-08-31) ###
247 | - New Features
248 |     - Automatically Defangs output
249 |     - MISP Support (example added to machinae.yml)
250 | 
251 | ### Version 1.4.0 (2016-04-20) ###
252 | - New features
253 |     - "-a"/"--auth" option for passing an auth config file
254 |         - Thanks johannestaas for the submission
255 |     - "-H"/"--http-proxy" option, and environment support, for HTTP proxies
256 | - New sites
257 |     - Passivetotal (various forms, thanks johannestaas)
258 |     - MaxMind
259 |     - FraudGuard
260 |     - Shodan
261 | - Updated sites
262 |     - FreeGeoIP (replaced freegeoip.net with freegeoip.io)
263 | 
264 | ### Version 1.3.4 (2016-04-01) ###
265 | - Bug fixes
266 |     - Convert exceptions to str when outputting to JSON
267 |         - Should actually close #14
268 | 
269 | ### Version 1.3.3 (2016-03-28) ###
270 | - Bug fixes
271 |     - Correctly handle error results when outputting to JSON
272 |         - Closes #14
273 |         - Thanks Den1al for the bug report
274 | 
275 | ### Version 1.3.2 (2016-03-10) ###
276 | - New features
277 |     - "Short" output mode - simply output yes/no/error for each site
278 |     - "-i"/"--infile" option for passing a file with list of targets
279 | 
280 | ### Version 1.3.1 (2016-03-08) ###
281 | 
282 | - New features
283 |     - Prepend "http://" to URL targets when not starting with http:// or https://
284 | 
285 | ### Version 1.3.0 (2016-03-07) ###
286 | 
287 | - New sites
288 |     - Cymon.io - Threat intel aggregator/tracker by eSentire
289 | - New features
290 |     - Support simple paginated responses
291 |     - Support url encoding 'target' in request URL
292 |     - Support url decoding values in results
293 | 
294 | ### Version 1.2.0 (2016-02-16) ###
295 | 
296 | - New features
297 |     - Support for sites returning multiple JSON documents
298 |     - Ability to specify time format for relative time parameters
299 |     - Ability to parse Unix timestamps in results and display in ISO-8601 format
300 |     - Ability to specify status codes to ignore per-API
301 | - New sites
302 |     - DNSDB - FarSight Security Passive DNS Data base (premium)
303 | 
304 | ### Version 1.1.2 (2015-11-26) ###
305 | 
306 | - New sites
307 |     - Telize (premium) - GeoIP site (premium)
308 |     - Freegeoip - GeoIP site (free)
309 |     - CIF - CIFv2 API support, from csirtgadgets.org
310 | - New features
311 |     - Ability to specify labels for single-line multimatch JSON outputs
312 |     - Ability to specify relative time parameters using relatime library
313 | 
314 | ### Version 1.0.1 (2015-10-13) ###
315 | 
316 | - Fixed a false-positive bug with Spamhaus (Github#10)
317 | 
318 | ### Version 1.0.0 (2015-07-02) ###
319 | 
320 | - Initial release
321 | 
322 | 
323 | License Info
324 | ------------
325 | 
326 | The MIT License (MIT)
327 | 
328 | Copyright (c) 2015 Hurricane Labs LLC
329 | 
330 | Permission is hereby granted, free of charge, to any person obtaining a copy
331 | of this software and associated documentation files (the "Software"), to deal
332 | in the Software without restriction, including without limitation the rights
333 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
334 | copies of the Software, and to permit persons to whom the Software is
335 | furnished to do so, subject to the following conditions:
336 | 
337 | The above copyright notice and this permission notice shall be included in
338 | all copies or substantial portions of the Software.
339 | 
340 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
341 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
342 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
343 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
344 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
345 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
346 | THE SOFTWARE.
347 | 
348 | 
349 | [1]: https://github.com/1aN0rmus/TekDefense-Automater
350 | [2]: https://github.com/HurricaneLabs/machinae/raw/master/machinae.yml
351 | 


--------------------------------------------------------------------------------
/images/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/machinae-square.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HurricaneLabs/machinae/9ef3e6ce1a8d4ad00107ca206e72bf8dc09878f1/images/machinae-square.jpg


--------------------------------------------------------------------------------
/images/machinae.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HurricaneLabs/machinae/9ef3e6ce1a8d4ad00107ca206e72bf8dc09878f1/images/machinae.jpg


--------------------------------------------------------------------------------
/images/robot-plainer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HurricaneLabs/machinae/9ef3e6ce1a8d4ad00107ca206e72bf8dc09878f1/images/robot-plainer.jpg


--------------------------------------------------------------------------------
/images/t-machinae.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HurricaneLabs/machinae/9ef3e6ce1a8d4ad00107ca206e72bf8dc09878f1/images/t-machinae.jpg


--------------------------------------------------------------------------------
/machinae.yml:
--------------------------------------------------------------------------------
   1 | ipwhois:
   2 |   name: IP Whois
   3 |   otypes:
   4 |     - ipv4
   5 |   ipwhois:
   6 |     results:
   7 |       - key: '@'
   8 |         multi_match:
   9 |           keys:
  10 |             - asn
  11 |             - asn_cidr
  12 |             - asn_date
  13 |             - asn_registry
  14 |             - asn_country_code
  15 |         pretty_name: ASN Information
  16 |       - key: nets
  17 |         multi_match:
  18 |           keys:
  19 |             - cidr
  20 |             - handle
  21 |             - name
  22 |             - range
  23 |         pretty_name: Network Information
  24 |       - key: nets
  25 |         multi_match:
  26 |           keys:
  27 |             - description
  28 |             - key: created
  29 |               regex: '(\d+-\d+-\d+)T'
  30 |             - key: updated
  31 |               regex: '(\d+-\d+-\d+)T'
  32 |         pretty_name: Registration Info
  33 |       - key: nets
  34 |         multi_match:
  35 |           keys:
  36 |             - city
  37 |             - state
  38 |             - postal_code
  39 |             - country
  40 |         pretty_name: Registration Locality
  41 |       # For when we use RWS
  42 |       - key: nets
  43 |         multi_match:
  44 |           keys:
  45 |             - key: abuse_emails
  46 |               split: "\n"
  47 |         pretty_name: Abuse Email
  48 |       - key: nets
  49 |         multi_match:
  50 |           keys:
  51 |             - key: tech_emails
  52 |               split: "\n"
  53 |         pretty_name: Tech Email
  54 |       # For when we fall back to regular whois
  55 |       - key: nets
  56 |         multi_match:
  57 |           keys:
  58 |             - key: emails
  59 |               split: "\n"
  60 |         pretty_name: Contacts
  61 | spamhaus_ip:
  62 |   name: Spamhaus Zen BL
  63 |   default: False
  64 |   otypes:
  65 |     - ipv4
  66 |   webscraper:
  67 |     request:
  68 |       url: 'http://www.spamhaus.org/query/ip/{target}'
  69 |       method: get
  70 |       strip_comments: true
  71 |     results:
  72 |       - regex: '<b><font color="red">\S+ is (listed in the \w+)</FONT></B>'
  73 |         values:
  74 |           - spamhaus_zenbl
  75 |         pretty_name: Spamhaus Zen BL
  76 | spamhaus_domain:
  77 |   name: Spamhaus Domain BL
  78 |   default: False
  79 |   otypes:
  80 |     - fqdn
  81 |   webscraper:
  82 |     request:
  83 |       url: 'http://www.spamhaus.org/query/domain/{target}'
  84 |       method: get
  85 |     results:
  86 |       - regex: '<b><font color="red">\S+ is (listed in the \w+)</FONT></B>'
  87 |         values:
  88 |           - spamhaus_dbl
  89 |         pretty_name: Spamhaus DBL
  90 | ipvoid:
  91 |   name: IPVoid
  92 |   default: False
  93 |   otypes:
  94 |     - ipv4
  95 |   json:
  96 |     request:
  97 |       url: 'https://endpoint.apivoid.com/iprep/v1/pay-as-you-go/'
  98 |       params:
  99 |         key:
 100 |         ip: '{target}'
 101 |       method: get
 102 |     results:
 103 |     - key: data.report.blacklists.detections
 104 |       pretty_name: Number of detections
 105 |     - key: data.report.blacklists.detection_rate
 106 |       pretty_name: IP Void Detection Rate
 107 |     - key: data.report.blacklists.engines
 108 |       pretty_name: Engines
 109 |       multi_match:
 110 |         keys:
 111 |           - engine
 112 |           - reference
 113 |         onlyif: detected
 114 | 
 115 | urlvoid:
 116 |   name: URLVoid
 117 |   otypes:
 118 |     - fqdn
 119 |   webscraper:
 120 |     request:
 121 |       url: 'http://www.urlvoid.com/scan/{target}'
 122 |       method: get
 123 |     results:
 124 |       - regex: 'Analysis Date<\/td><td>(.+?)<\/td>'
 125 |         values: urlvoid_analysis_date
 126 |         pretty_name: Last Analysis
 127 |       - regex: '(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).{5,30}Find\swebsites\shosted\shere'
 128 |         values: urlvoid_ip
 129 |         pretty_name: IP from URLVoid
 130 |       - regex: '\/>(.+?)<\/td><td><i class="glyphicon glyphicon-alert text-danger"><\/i>'
 131 |         values: urlvoid_blacklist
 132 |         pretty_name: Blacklist from URL Void
 133 |       - regex: 'Domain\s1st\sRegistered.+\<td\>(.+)\<\/td\>'
 134 |         values: urlvoid_domain_age
 135 |         pretty_name: Domain Age from URL Void
 136 |       - regex: 'latitude\s/\slongitude.+\<td\>(.+)\<\/td\>'
 137 |         values: urlvoid_location
 138 |         pretty_name: Geo Coordinates from URLVoid
 139 |       - regex: 'alt="flag"\s/>\s\(\w+\)\s+([\w\s]+)</td>'
 140 |         values: urlvoid_country_code
 141 |         pretty_name: Country from URLVoid
 142 | unshorten:
 143 |   name: URL Unshorten
 144 |   otypes:
 145 |     - fqdn
 146 |     - url
 147 |   webscraper:
 148 |     request:
 149 |       url: http://www.toolsvoid.com/unshorten-url
 150 |       method: post
 151 |       data:
 152 |         urladdr: '{target}'
 153 |     results:
 154 |       - regex: 'class="myarea">(.*?)</textarea'
 155 |         values:
 156 |           - unshorten_url
 157 |         pretty_name: Unshortened URL
 158 | malc0de:
 159 |   name: Malc0de
 160 |   otypes:
 161 |     - ipv4
 162 |     - fqdn
 163 |     - hash
 164 |   webscraper:
 165 |     request:
 166 |       url: 'https://malc0de.com/database/index.php?search={target}'
 167 |       method: get
 168 |     results:
 169 |     - regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 170 |       values:
 171 |         - malc0de_date
 172 |       pretty_name: "MC Date"
 173 |     - regex: 'search=(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
 174 |       values:
 175 |         - malc0de_ipaddr
 176 |       pretty_name: MC IP
 177 |     - regex: '(?!search=NA)search=([A-Z]{2})'
 178 |       values:
 179 |         - malc0de_country
 180 |       pretty_name: MC Country
 181 |     - regex: 'search=\d{4,5}..(\d{4,5})'
 182 |       values:
 183 |         - malc0de_asn
 184 |       pretty_name: MC ASN
 185 |     - regex: 'search=\d{4,5}..([A-Za-z]+)'
 186 |       values:
 187 |         - malc0de_asn_name
 188 |       pretty_name: MC ASN Name
 189 |     - regex: 'latest\-scan\/([A-Fa-f0-9]{32})'
 190 |       values:
 191 |         - malc0de_md5
 192 |       pretty_name: MC MD5
 193 | 
 194 | AbuseIPDB:
 195 |   name: AbuseIPDB
 196 |   otypes:
 197 |     - ipv4
 198 |   webscraper:
 199 |     request:
 200 |       url: 'https://abuseipdb.com/check/{target}'
 201 |       method: get
 202 |     results:
 203 | 
 204 |     - regex: '((?<=This\sIP\swas\sreported\s<b>)\d{1,3})'
 205 |       values:
 206 |         - AbuseIPReports
 207 |       pretty_name: 'AbuseIPDB reports'
 208 | 
 209 |     - regex: '((?<=most\srecent\sreport\swas\s<b>)\d{1,3}\s\w+\s\w+)'
 210 |       values:
 211 |         - Last_seen
 212 |       pretty_name: 'Last seen'
 213 | 
 214 | RansomwareTracker:
 215 |   name: RansomwareTracker
 216 |   otypes:
 217 |     - ipv4
 218 |   webscraper:
 219 |     request:
 220 |       url: 'https://ransomwaretracker.abuse.ch/host/{target}'
 221 |       method: get
 222 |     results:
 223 |     - regex: '((?<=Host\sStatus:</th><td\scolspan="2"><span\sclass="buttonoffline">)\w+)'
 224 |       values:
 225 |         - Active
 226 |       pretty_name: 'Host Status'
 227 |     - regex: '((?<=</td><td>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}</td><td>)\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})'
 228 |       values:
 229 |         - Last_seen
 230 |       pretty_name: 'Last seen'
 231 |     - regex: '((?<=Malware:</th><td\scolspan="2">)\w+)'
 232 |       values:
 233 |         - ransomwareType
 234 |       pretty_name: 'Ransomware Type'
 235 | 
 236 | sans:
 237 |   name: SANS
 238 |   otypes:
 239 |     - ipv4
 240 |   webscraper:
 241 |     request:
 242 |       url: 'https://isc.sans.edu/api/ip/{target}'
 243 |       method: get
 244 |     results:
 245 |     - regex: 'attacks>(\d+)<'
 246 |       values:
 247 |         - sans_attacks
 248 |       pretty_name: SANS attacks
 249 |     - regex: 'count>(\d+)<'
 250 |       values:
 251 |         - sans_count
 252 |       pretty_name: SANS count
 253 |     - regex: 'count>(\d+)<'
 254 |       values:
 255 |         - sans_count
 256 |       pretty_name: SANS count
 257 |     - regex: 'maxdate>(\d{4}-\d{2}-\d{2})<'
 258 |       values:
 259 |         - sans_maxdate
 260 |       pretty_name: SANS maxdate
 261 |     - regex: 'mindate>(\d{4}-\d{2}-\d{2})<'
 262 |       values:
 263 |         - sans_mindate
 264 |       pretty_name: SANS mindate
 265 | telize:
 266 |   name: Telize GeoIP
 267 |   default: False
 268 |   otypes:
 269 |     - ipv4
 270 |   json:
 271 |     request:
 272 |       url: 'https://telize-v1.p.rapidapi.com/location/{target}'
 273 |       method: get
 274 |       headers:
 275 |         x-rapidapi-host: telize-v1.p.rapidapi.com
 276 |         x-rapidapi-key:
 277 |         Accept: application/json
 278 |     results:
 279 |     - key: continent_code
 280 |       pretty_name: GeoIP Continent Code
 281 |     - key: country_code
 282 |       pretty_name: GeoIP Country Code
 283 |     - key: country
 284 |       pretty_name: GeoIP Country
 285 |     - key: region_code
 286 |       pretty_name: GeoIP Region Code
 287 |     - key: region
 288 |       pretty_name: GeoIP Region
 289 |     - key: city
 290 |       pretty_name: GeoIP City
 291 |     - key: postal_code
 292 |       pretty_name: GeoIP Zip Code
 293 |     - key: latitude
 294 |       pretty_name: GeoIP Latitude
 295 |     - key: longitude
 296 |       pretty_name: GeoIP Longitude
 297 |     - key: timezone
 298 |       pretty_name: GeoIP Timezone
 299 |     - key: offset
 300 |       pretty_name: GeoIP UTC Offset
 301 |     - key: asn
 302 |       pretty_name: GeoIP ASN
 303 |     - key: isp
 304 |       pretty_name: GeoIP ISP
 305 | maxmind:
 306 |   name: MaxMind GeoIP2 Precision
 307 |   default: False
 308 |   otypes:
 309 |     - ipv4
 310 |   json:
 311 |     request:
 312 |       url: https://geoip.maxmind.com/geoip/v2.1/insights/{target}
 313 |       auth: maxmind
 314 |     results:
 315 |     - key: country.iso_code
 316 |       pretty_name: MaxMind Country Code
 317 |     - key: country.names.en
 318 |       pretty_name: MaxMind Country
 319 |     - key: subdivisions
 320 |       multi_match:
 321 |         keys:
 322 |           - iso_code
 323 |       pretty_name: MaxMind Region Code
 324 |     - key: subdivisions
 325 |       multi_match:
 326 |         keys:
 327 |           - names.en
 328 |       pretty_name: MaxMind Region
 329 |     - key: city.names.en
 330 |       pretty_name: MaxMind City
 331 |     - key: postal.code
 332 |       pretty_name: MaxMind Zip Code
 333 |     - key: location.latitude
 334 |       pretty_name: MaxMind Latitude
 335 |     - key: location.longitude
 336 |       pretty_name: MaxMind Longitude
 337 |     - key: location.time_zone
 338 |       pretty_name: MaxMind Timezone
 339 | freegeoip:
 340 |   name: freegeoip.io
 341 |   default: true
 342 |   otypes:
 343 |     - ipv4
 344 | #    - fqdn
 345 |   json:
 346 |     request:
 347 |       url: https://freegeoip.io/json/{target}
 348 |     results:
 349 |     - key: country_code
 350 |       pretty_name: GeoIP Country Code
 351 |     - key: country_name
 352 |       pretty_name: GeoIP Country
 353 | #    - key: region_code
 354 | #      pretty_name: GeoIP Region Code
 355 | #    - key: region_name
 356 | #      pretty_name: GeoIP Region
 357 |     - key: city
 358 |       pretty_name: GeoIP City
 359 | #    - key: zip_code
 360 | #      pretty_name: GeoIP Zip Code
 361 | #    - key: latitude
 362 | #      pretty_name: GeoIP Latitude
 363 | #    - key: longitude
 364 | #      pretty_name: GeoIP Longitude
 365 | #    - key: time_zone
 366 | #      pretty_name: GeoIP Timezone
 367 | fortinet_classify:
 368 |   name: Fortinet Category
 369 |   default: True
 370 |   otypes:
 371 |     - ipv4
 372 |     - fqdn
 373 |     - url
 374 |   webscraper:
 375 |     request:
 376 |       url: 'https://www.fortiguard.com/webfilter?q={target}'
 377 |       method: get
 378 |     results:
 379 |     - regex: 'Category:\s(.+)<\/h4>\s'
 380 |       values:
 381 |         - fortinet_category
 382 |       pretty_name: Fortinet URL Category
 383 | vt_ip:
 384 |   name: VirusTotal pDNS
 385 |   otypes:
 386 |     - ipv4
 387 |   json:
 388 |     request:
 389 |       url: https://www.virustotal.com/vtapi/v2/ip-address/report
 390 |       params:
 391 |         ip: '{target}'
 392 |         apikey: 308211ef74a1044ea98134424b3d20769451d25beda0b808a8b61036badc0ea1
 393 |       method: get
 394 |     results:
 395 |     - key: resolutions
 396 |       multi_match:
 397 |         keys:
 398 |           - key: last_resolved
 399 |             regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 400 |           - hostname
 401 |         onlyif:
 402 |           key: last_resolved
 403 |           maxage: '-30d'
 404 |       pretty_name: pDNS data from VirusTotal
 405 |     - key: detected_urls
 406 |       multi_match:
 407 |         keys:
 408 |           - key: scan_date
 409 |             regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 410 |           - key: url
 411 |             regex: '(http.{1,70}/)'
 412 |         onlyif:
 413 |           key: scan_date
 414 |           maxage: '-30d'
 415 |       pretty_name: pDNS malicious URLs from VirusTotal
 416 | # vt_ip:
 417 | #   name: VirusTotal pDNS
 418 | #   otypes:
 419 | #     - ip
 420 | #   webscraper:
 421 | #     request:
 422 | #       url: 'https://www.virustotal.com/en/ip-address/{target}/information/'
 423 | #       method: get
 424 | #       headers:
 425 | #         Accept: 'text/html, application/xhtml+xml, */*'
 426 | #         Accept-Language: 'en-US'
 427 | #         Accept-Encoding: 'gzip, deflate'
 428 | #         DNT: 1
 429 | #         Connection: 'Keep-Alive'
 430 | #     results:
 431 | #     - regex: '(\d{4}\-\d{1,2}\-\d{1,2})\s+<.{30,70}/en/domain/(.{1,80})/information'
 432 | #       values:
 433 | #         - vt_pdns_date
 434 | #         - vt_pdns_domain
 435 | #       pretty_name: 'pDNS data from VirtusTotal'
 436 | #     - regex: '(\d{4}\-\d{1,2}\-\d{1,2}).{1,20}\s+<.{10,80}/en/url/.{1,100}/analysis/.{1,5}\s+(http.{1,70}/)'
 437 | #       values:
 438 | #         - vt_pdns_date
 439 | #         - vt_pdns_url
 440 | #       pretty_name: 'pDNS malicious URLs from VirusTotal'
 441 | vt_domain:
 442 |   name: VirusTotal pDNS
 443 |   otypes:
 444 |     - fqdn
 445 |   json:
 446 |     request:
 447 |       url: https://www.virustotal.com/vtapi/v2/domain/report
 448 |       params:
 449 |         domain: '{target}'
 450 |         apikey: 308211ef74a1044ea98134424b3d20769451d25beda0b808a8b61036badc0ea1
 451 |       method: get
 452 |     results:
 453 |     - key: resolutions
 454 |       multi_match:
 455 |         keys:
 456 |           - key: last_resolved
 457 |             regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 458 |           - ip_address
 459 |       pretty_name: pDNS data from VirusTotal
 460 |     - key: Websense ThreatSeeker category
 461 |       pretty_name: Websense ThreatSeeker category
 462 |     - key: Webutation domain info.Safety score
 463 |       pretty_name: Webutation Safety score
 464 | # vt_domain:
 465 | #   name: VirusTotal pDNS
 466 | #   otypes:
 467 | #     - fqdn
 468 | #   webscraper:
 469 | #     request:
 470 | #       url: 'https://www.virustotal.com/en/domain/{target}/information/'
 471 | #       method: get
 472 | #       headers:
 473 | #         Accept: 'text/html, application/xhtml+xml, */*'
 474 | #         Accept-Language: 'en-US'
 475 | #         Accept-Encoding: 'gzip, deflate'
 476 | #         DNT: 1
 477 | #         Connection: 'Keep-Alive'
 478 | #     results:
 479 | #     - regex: '(\d{4}\-\d{1,2}\-\d{1,2})\s+<.{30,70}/en/ip-address/(.{1,80})/information'
 480 | #       values:
 481 | #         - vt_pdns_date
 482 | #         - vt_pdns_ip
 483 | #       pretty_name: 'pDNS data from VirtusTotal'
 484 | #     - regex: '(\d{4}\-\d{1,2}\-\d{1,2}).{1,20}\s+<.{10,80}/en/url/.{1,100}/analysis/.{1,5}\s+(http.{1,70}/)'
 485 | #       values:
 486 | #         - vt_pdns_date
 487 | #         - vt_pdns_url
 488 | #       pretty_name: 'pDNS malicious URLs from VirusTotal'
 489 | vt_url:
 490 |   name: VirusTotal URL Report
 491 |   otypes:
 492 |     - url
 493 |   json:
 494 |     request:
 495 |       url: https://www.virustotal.com/vtapi/v2/url/report
 496 |       method: get
 497 |       params:
 498 |         apikey: 308211ef74a1044ea98134424b3d20769451d25beda0b808a8b61036badc0ea1
 499 |         resource: '{target}'
 500 |     results:
 501 |       - key: scan_date
 502 |         pretty_name: Date submitted
 503 |       - key: positives
 504 |         pretty_name: Detected scanners
 505 |       - key: total
 506 |         pretty_name: Total scanners
 507 |       - key: scans
 508 |         pretty_name: URL Scanner
 509 |         multi_match:
 510 |           keys:
 511 |             - '@'
 512 |             - result
 513 |           onlyif: detected
 514 | vt_hash:
 515 |   name: VirusTotal File Report
 516 |   otypes:
 517 |     - hash
 518 |     - hash.sha1
 519 |     - 'hash.sha256'
 520 |   json:
 521 |     request:
 522 |       url: https://www.virustotal.com/vtapi/v2/file/report
 523 |       method: get
 524 |       params:
 525 |         apikey: 308211ef74a1044ea98134424b3d20769451d25beda0b808a8b61036badc0ea1
 526 |         resource: '{target}'
 527 |     results:
 528 |       - key: scan_date
 529 |         pretty_name: Date submitted
 530 |       - key: positives
 531 |         pretty_name: Detected engines
 532 |       - key: total
 533 |         pretty_name: Total engines
 534 |       - key: scans
 535 |         pretty_name: Scans
 536 |         multi_match:
 537 |           keys:
 538 |             - '@'
 539 |             - result
 540 |           onlyif: detected
 541 | reputation_authority:
 542 |   name: Reputation Authority
 543 |   otypes:
 544 |     - fqdn
 545 |     - ipv4
 546 |   webscraper:
 547 |     request:
 548 |       url: 'http://www.reputationauthority.org/lookup.php?ip={target}'
 549 |       method: get
 550 |     results:
 551 |       - regex: '>(\d{1,3}\/\d{1,3})'
 552 |         values:
 553 |           - ra_score
 554 |         pretty_name: Reputation Authority Score
 555 | threatexpert:
 556 |   name: ThreatExpert
 557 |   otypes:
 558 |     - hash
 559 |   webscraper:
 560 |     request:
 561 |       url: 'http://www.threatexpert.com/report.aspx?md5={target}'
 562 |       method: get
 563 |     results:
 564 |       - regex: 'Submission\sreceived.\s(.+)</li>'
 565 |         values:
 566 |           - threatexpert_date
 567 |         pretty_name: Hash found at ThreatExpert
 568 |       - regex: '1">(.{5,100})</td.{10,35}src\='
 569 |         values:
 570 |           - threatexpert_indicators
 571 |         pretty_name: Malicious Indicators from ThreatExpert
 572 | vxvault:
 573 |   name: VxVault
 574 |   otypes:
 575 |     - hash
 576 |   webscraper:
 577 |     request:
 578 |       url: 'http://vxvault.net/ViriList.php?MD5={target}'
 579 |       method: get
 580 |     results:
 581 |       # <tr>\s*<td.*?><a.*?>(\d+-\d+)</a></td>\s*<td.*?><a.*?>\[D\]</a>\s*<a.*?>(.*?)</a></td>\s*<td.*?></td>\s*<td.*?><a.*?>(.*?)</a>
 582 |       - regex: '>(\d{2}\-\d{2})<'
 583 |         values:
 584 |           - vxvault_date
 585 |         pretty_name: Date found at VXVault
 586 |       - regex: '\[D\].{2,40}\Wphp\?id.{2,10}>(.{5,100})</a'
 587 |         values:
 588 |           - vxvault_url
 589 |         pretty_name: URL found at VXVault
 590 | projecthoneypot:
 591 |   name: ProjectHoneypot
 592 |   default: False
 593 |   otypes:
 594 |     - ipv4
 595 |   webscraper:
 596 |     request:
 597 |       url: 'https://www.projecthoneypot.org/ip_{target}'
 598 |       method: get
 599 |     results:
 600 |       - regex: 'list_of_ips\.php\?t=[a-z]\">([a-zA-Z\s]+)</a></b>'
 601 |         values:
 602 |           - php_activity_type
 603 |         pretty_name: ProjectHoneyPot activity type
 604 |       - regex: '>First&nbsp;Received&nbsp;From.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])[a-zA-Z0-9><"&:,()=;\s\t/]+Number&nbsp;Received'
 605 |         values:
 606 |           - php_first_mail
 607 |         pretty_name: ProjectHoneyPot first mail received
 608 |       - regex: '>Last&nbsp;Received&nbsp;From.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])[a-zA-Z0-9><":,()=;\s\t/]+Number&nbsp;Received'
 609 |         values:
 610 |           - php_last_mail
 611 |         pretty_name: ProjectHoneyPot last mail received
 612 |       - regex: '>Number&nbsp;Received.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z\)])'
 613 |         values:
 614 |           - php_total_mail
 615 |         pretty_name: ProjectHoneyPot total mail received
 616 |       - regex: '>Spider&nbsp;First&nbsp;Seen.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 617 |         values:
 618 |           - php_first_spider
 619 |         pretty_name: ProjectHoneyPot spider first seen
 620 |       - regex: '>Spider&nbsp;Last&nbsp;Seen.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z])'
 621 |         values:
 622 |           - php_last_spider
 623 |         pretty_name: ProjectHoneyPot spider last seen
 624 |       - regex: '>Spider&nbsp;Sightings.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(]+[a-zA-Z\)])'
 625 |         values:
 626 |           - php_spider_sightings
 627 |         pretty_name: ProjectHoneyPot total spider sightings
 628 |       - regex: '>User-Agents.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9\-\(\),\s]+[a-zA-Z\)])'
 629 |         values:
 630 |           - php_user_agents
 631 |         pretty_name: ProjectHoneyPot user-agent sightings
 632 |       - regex: '>First&nbsp;Post&nbsp;On.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 633 |         values:
 634 |           - php_first_post
 635 |         pretty_name: ProjectHoneyPot first form post
 636 |       - regex: '>Last&nbsp;Post&nbsp;On.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 637 |         values:
 638 |           - php_last_post
 639 |         pretty_name: ProjectHoneyPot last form post
 640 |       - regex: '>Form&nbsp;Posts.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z\)])'
 641 |         values:
 642 |           - php_form_posts
 643 |         pretty_name: ProjectHoneyPot total form posts
 644 |       - regex: '>First&nbsp;Rule-Break&nbsp;On.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 645 |         values:
 646 |           - php_first_rulebreak
 647 |         pretty_name: ProjectHoneyPot first rule break
 648 |       - regex: '>Last&nbsp;Rule-Break&nbsp;On.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 649 |         values:
 650 |           - php_last_rulebreak
 651 |         pretty_name: ProjectHoneyPot last rule break
 652 |       - regex: '>Rule&nbsp;Breaks.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z\)])'
 653 |         values:
 654 |           - php_total_rulebreaks
 655 |         pretty_name: ProjectHoneyPot total rule breaks
 656 |       - regex: 'Dictionary&nbsp;Attacks[a-zA-Z0-9><":,()=;\s\t/]+>First&nbsp;Received&nbsp;From.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 657 |         values:
 658 |           - php_first_dictionary_attack
 659 |         pretty_name: ProjectHoneyPot first dictionary attack
 660 |       - regex: 'Dictionary&nbsp;Attacks[a-zA-Z0-9><"&:,()=;\s\t/]+>Last&nbsp;Received&nbsp;From.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 661 |         values:
 662 |           - php_last_dictionary_attack
 663 |         pretty_name: ProjectHoneyPot last dictionary attack
 664 |       - regex: '>Dictionary&nbsp;Attacks.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z\)])'
 665 |         values:
 666 |           - php_total_dictionary_attacks
 667 |         pretty_name: ProjectHoneyPot total dictionary attacks
 668 |       - regex: '>First&nbsp;Bad&nbsp;Host&nbsp;Appearance.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 669 |         values:
 670 |           - php_first_bad_host
 671 |         pretty_name: ProjectHoneyPot first bad host
 672 |       - regex: '>Last&nbsp;Bad&nbsp;Host&nbsp;Appearance.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 673 |         values:
 674 |           - php_last_bad_host
 675 |         pretty_name: ProjectHoneyPot last bad host
 676 |       - regex: '>Bad&nbsp;Host&nbsp;Appearances.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)\-]+[a-zA-Z\)])'
 677 |         values:
 678 |           - php_total_bad_host
 679 |         pretty_name: ProjectHoneyPot total bad hosts
 680 |       - regex: '>Harvester&nbsp;First&nbsp;Seen.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s]+[a-zA-Z])'
 681 |         values:
 682 |           - php_first_harvester
 683 |         pretty_name: ProjectHoneyPot harvester first seen
 684 |       - regex: '>Harvester&nbsp;Last&nbsp;Seen.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\s\(\)]+[a-zA-Z])'
 685 |         values:
 686 |           - php_last_harvester
 687 |         pretty_name: ProjectHoneyPot harvester last seen
 688 |       - regex: '>Harvester&nbsp;Sightings.+[\n\r\t\s]+.+[\n\r\t\s]+([a-zA-Z0-9,\(\s]+[a-zA-Z\)])'
 689 |         values:
 690 |           - php_total_harvester
 691 |         pretty_name: ProjectHoneyPot total harvester sightings
 692 |       - regex: '(?:>Harvester&nbsp;Results(?:.+[\n\s].+[\n\s]+)\s{2,}|(?:<br\s/>))(?!\s)([0-9a-zA-Z.\s:,()-]+)\s{2,}'
 693 |         values:
 694 |           - php_harvester_results
 695 |         pretty_name: ProjectHoneyPot harvester results
 696 | mcafee_threat_domain:
 697 |   name: McAfee Threat
 698 |   otypes:
 699 |     - fqdn
 700 |   webscraper:
 701 |     request:
 702 |       url: 'https://www.mcafee.com/threat-intelligence/domain/default.aspx?domain={target}'
 703 |       method: get
 704 |     results:
 705 |       - regex: 'ctl00_breadcrumbContent_imgRisk"[^\r\n]+title="([A-Za-z]+)"'
 706 |         values:
 707 |           - mcafee_risk
 708 |         pretty_name: McAfee Web Risk
 709 |       - regex: '<li>[\n\s]*Web\sCategory:[\n\s]*([A-Z][A-Za-z\s/,]+?)[\n\s]*</li>'
 710 |         values:
 711 |           - mcafee_category
 712 |         pretty_name: McAfee Web Category
 713 |       - regex: '<li>[\n\s]*Last\sSeen:[\n\s]*([0-9\-]+)[\n\s]*</li>'
 714 |         values:
 715 |           - mcafee_last_seen
 716 |         pretty_name: McAfee Last Seen
 717 | mcafee_threat_ip:
 718 |   name: McAfee Threat
 719 |   otypes:
 720 |     - ipv4
 721 |   webscraper:
 722 |     request:
 723 |       url: 'https://www.mcafee.com/threat-intelligence/ip/default.aspx?ip={target}'
 724 |       method: get
 725 |     results:
 726 |       - regex: 'ctl00_breadcrumbContent_imgRisk"[^\r\n]+src="/img/Threat_IP/rep_([a-z]+)\.png"'
 727 |         values:
 728 |           - mcafee_risk
 729 |         pretty_name: McAfee Web Risk
 730 |       - regex: 'ctl00_breadcrumbContent_imgRisk1"[^\r\n]+src="/img/Threat_IP/rep_([a-z]+)\.png"'
 731 |         values:
 732 |           - mcafee_risk
 733 |         pretty_name: McAfee Email Risk
 734 |       - regex: 'ctl00_breadcrumbContent_imgRisk2"[^\r\n]+src="/img/Threat_IP/rep_([a-z]+)\.png"'
 735 |         values:
 736 |           - mcafee_risk
 737 |         pretty_name: McAfee Network Risk
 738 |       - regex: '<li>[\n\s]*Web\sCategory:[\n\s]*([A-Z][A-Za-z\s/,]+?)[\n\s]*</li>'
 739 |         values:
 740 |           - mcafee_category
 741 |         pretty_name: McAfee Web Category
 742 | stopforumspam:
 743 |   name: StopForumSpam
 744 |   otypes:
 745 |     - email
 746 |   webscraper:
 747 |     request:
 748 |       url: 'http://www.stopforumspam.com/search/{target}'
 749 |       method: get
 750 |     results:
 751 |       - regex: '>Found (0*[1-9]\d*) entries'
 752 |         values:
 753 |           - sfs_spam_count
 754 |         pretty_name: Spam email count
 755 | cymru_mhr:
 756 |   name: Cymru MHR
 757 |   otypes:
 758 |     - hash
 759 |     - hash.sha1
 760 |   webscraper:
 761 |     request:
 762 |       url: 'https://hash.cymru.com/cgi-bin/bulkmhr.cgi'
 763 |       method: post
 764 |       data:
 765 |         action: do_whois
 766 |         bulk_paste: '{target}'
 767 |         submit_paste: Submit
 768 |     results:
 769 |       - regex: '[a-f0-9]+\s(\d+)\s(\d+)'
 770 |         values:
 771 |           - cymru_mhr_detect_time
 772 |           - cymru_mhr_detect_pct
 773 |         pretty_name: Cymru MHR Detection Percent
 774 | icsi_notary:
 775 |   name: ICSI Certificate Notary
 776 |   otypes:
 777 |     - sslfp
 778 |   dns:
 779 |     request:
 780 |       query: '{target_stripped}.notary.icsi.berkeley.edu'
 781 |       rrtype: txt
 782 |     results:
 783 |       - regex: 'version=1 first_seen=(\d+) last_seen=(\d+) times_seen=(\d+) validated=(\d+)'
 784 |         values:
 785 |           - icsi_first_seen
 786 |           - icsi_last_seen
 787 |           - icsi_times_seen
 788 |           - icsi_validated
 789 |         pretty_name: ICSI Notary Results
 790 | totalhash_ip:
 791 |   name: TotalHash
 792 |   default: false
 793 |   otypes:
 794 |     - ip
 795 |   webscraper:
 796 |     request:
 797 |       url: 'https://totalhash.com/network/dnsrr:*{target}*%20or%20ip:{target}'
 798 |       method: get
 799 |     results:
 800 |       - regex: '/analysis/(\w{40}).+(\d{4}\-\d{1,2}\-\d{1,2}\s\d{1,2}:\d{1,2}:\d{1,2})'
 801 |         values:
 802 |           - thip_hash
 803 |           - thip_date
 804 |         pretty_name: Totalhash
 805 | domaintools_parsed_whois:
 806 |   name: DomainTools Whois
 807 |   default: false
 808 |   otypes:
 809 |     - fqdn
 810 |   json:
 811 |     request:
 812 |       url: 'https://api.domaintools.com/v1/{target}/whois/parsed'
 813 |       method: get
 814 |       params:
 815 |         api_username:
 816 |         api_key:
 817 |     results:
 818 |       - key: response.parsed_whois.contacts
 819 |         multi_match:
 820 |           keys:
 821 |             - '@'
 822 |             - name
 823 |             - country
 824 |             - email
 825 |           onlyif: name
 826 |         pretty_name: Whois Contacts
 827 |       - key: response.parsed_whois.created_date
 828 |         pretty_name: Domain registered
 829 |         regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 830 |       - key: response.parsed_whois.updated_date
 831 |         pretty_name: Whois updated
 832 |         regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 833 |       - key: response.parsed_whois.expired_date
 834 |         pretty_name: Domain expiration
 835 |         regex: '(\d{4}\-\d{1,2}\-\d{1,2})'
 836 |       - key: response.parsed_whois.name_servers
 837 |         pretty_name: Name Servers
 838 |         #match_all: true
 839 |       - key: response.parsed_whois.registrar
 840 |         pretty_name: Registrar Info
 841 |         multi_match:
 842 |           keys:
 843 |             - name
 844 |             - abuse_contact_phone
 845 |             - abuse_contact_email
 846 |             - url
 847 | domaintools_reverse_whois:
 848 |   name: DomainTools Reverse Whois
 849 |   default: false
 850 |   otypes:
 851 |     - email
 852 |   json:
 853 |     request:
 854 |       url: 'https://api.domaintools.com/v1/reverse-whois/'
 855 |       method: get
 856 |       params:
 857 |         terms: '{target}'
 858 |         mode: purchase
 859 |         api_username:
 860 |         api_key:
 861 |     results:
 862 |       - key: response.domains
 863 |         match_all: true
 864 |         pretty_name: Registered domain
 865 |       - key: reponse.domain_count.current
 866 |         pretty_name: Currently active registered domains
 867 |       - key: response.domain_count.historic
 868 |         pretty_name: All registered domains
 869 | domaintools_reputation:
 870 |   name: DomainTools Reputation
 871 |   default: false
 872 |   otypes:
 873 |   - fqdn
 874 |   json:
 875 |     request:
 876 |       url: 'https://api.domaintools.com/v1/reputation/'
 877 |       method: get
 878 |       params:
 879 |         domain: '{target}'
 880 |         include_reasons: 'true'
 881 |         api_username:
 882 |         api_key:
 883 |     results:
 884 |     - key: response.risk_score
 885 |       pretty_name: Risk Score
 886 |     - key: response.reasons
 887 |       pretty_name: Reasons
 888 | dnsdb_ip:
 889 |   name: Farsight DNSDB
 890 |   default: False
 891 |   otypes:
 892 |   - ipv4
 893 |   - ipv6
 894 |   json:
 895 |     multi_json: true
 896 |     request:
 897 |       url: 'https://api.dnsdb.info/lookup/rdata/ip/{target}'
 898 |       method: get
 899 |       headers:
 900 |         Accept: application/json
 901 |         X-Api-Key:
 902 |     results:
 903 |     - key: '@'
 904 |       multi_match:
 905 |         keys:
 906 |           - rrname
 907 |           - rrtype
 908 |           - key: time_first
 909 |             format: as_time
 910 |           - key: time_last
 911 |             format: as_time
 912 |         labels:
 913 |           - Record Name
 914 |           - Record Type
 915 |           - First Seen
 916 |           - Last Seen
 917 | dnsdb_fqdn:
 918 |   name: Farsight DNSDB
 919 |   default: False
 920 |   otypes:
 921 |   - fqdn
 922 |   json:
 923 |     multi_json: true
 924 |     request:
 925 |       url: 'https://api.dnsdb.info/lookup/rrset/name/{target}'
 926 |       method: get
 927 |       ignored_status_codes:
 928 |         - 404
 929 |       params:
 930 |         time_last_after:
 931 |           relatime: '-7d'
 932 |           timezone: UTC
 933 |           format: as_epoch
 934 |       headers:
 935 |         Accept: application/json
 936 |         X-Api-Key:
 937 |     results:
 938 |     - key: '@'
 939 |       multi_match:
 940 |         keys:
 941 |           - rrtype
 942 |           - key: rdata
 943 |             # format: as_list
 944 |           - key: time_last
 945 |             format: as_time
 946 |         labels:
 947 |           - Record Type
 948 |           - Record Data
 949 |           - Last Seen
 950 |         onlyif:
 951 |           key: rrtype
 952 |           regex: "^(A|AAAA|MX|SPF|TXT)$"
 953 | cif:
 954 |   name: Collective Intelligence Framework
 955 |   default: false
 956 |   otypes:
 957 |   - ipv4
 958 |   - fqdn
 959 |   - email
 960 |   - hash
 961 |   json:
 962 |     request:
 963 |       url: 'https://cif/observables'
 964 |       method: get
 965 |       params:
 966 |         nolog: 1
 967 |         confidence: 75
 968 |         observable: '{target}'
 969 |         reporttime:
 970 |           relatime: '-2d'
 971 |           timezone: UTC
 972 |         reporttimeend:
 973 |           relatime: 'now'
 974 |           timezone: UTC
 975 |       headers:
 976 |         Accept: application/vnd.cif.v2+json
 977 |         Authorization: Token token=
 978 |       verify_ssl: False
 979 |     results:
 980 |     - key: '@'
 981 |       multi_match:
 982 |         keys:
 983 |           - asn
 984 |           - cc
 985 |         labels:
 986 |           - AS Number
 987 |           - Country Code
 988 |     - key: '@'
 989 |       multi_match:
 990 |         keys:
 991 |           - key: reporttime
 992 |             regex: '^(\d+-\d+-\d+)T'
 993 |           - confidence
 994 |           - key: tags
 995 |             format: as_list
 996 |           - provider
 997 |           - description
 998 |         labels:
 999 |           - Report Date
1000 |           - Confidence
1001 |           - Tags
1002 |           - Provider
1003 |           - Description
1004 | 
1005 | threatcrowd_ip_report:
1006 |   name: ThreatCrowd IP Report
1007 |   default: True
1008 |   otypes:
1009 |   - ipv4
1010 |   json:
1011 |     paginated: false
1012 |     request:
1013 |       url: 'https://www.threatcrowd.org/searchApi/v2/ip/report/?ip={target}'
1014 |       method: get
1015 |       ignored_status_codes:
1016 |         - 404
1017 |     results:
1018 |     - key: 'resolutions'
1019 |       pretty_name: Passive DNS
1020 |       multi_match:
1021 |         keys:
1022 |           - domain
1023 |           - last_resolved
1024 |         labels:
1025 |           - Domain
1026 |           - Last Resolved
1027 |         onlyif:
1028 |           key: last_resolved
1029 |           maxage: '-30d'
1030 |     - key: 'hashes'
1031 |       pretty_name: Known Malware Hash
1032 |       match_all: true
1033 | 
1034 | passivetotal_pdns:
1035 |   name: PassiveTotal Passive DNS
1036 |   default: False
1037 |   otypes:
1038 |     - fqdn
1039 |     - ipv4
1040 |   json:
1041 |     request:
1042 |       url: 'https://api.passivetotal.org/v2/dns/passive'
1043 |       auth: passivetotal
1044 |       params:
1045 |         query: '{target}'
1046 |       method: get
1047 |       headers:
1048 |         Accept: application/json
1049 |       ignored_status_codes:
1050 |         - 401
1051 |     results:
1052 |       - key: results
1053 |         format: as_list
1054 |         pretty_name: Results
1055 |         multi_match:
1056 |           keys:
1057 |             - key: resolve
1058 |       - key: queryValue
1059 |         pretty_name: Query Value
1060 | 
1061 | passivetotal_whois:
1062 |   name: PassiveTotal Whois
1063 |   default: False
1064 |   otypes:
1065 |     - fqdn
1066 |   json:
1067 |     request:
1068 |       url: 'https://api.passivetotal.org/v2/whois'
1069 |       auth: passivetotal
1070 |       params:
1071 |         query: '{target}'
1072 |       method: get
1073 |       headers:
1074 |         Accept: application/json
1075 |       ignored_status_codes:
1076 |         - 401
1077 |     results:
1078 |       - key: registryUpdatedAt
1079 |         pretty_name: Registry Updated At
1080 |       - key: domain
1081 |         pretty_name: Domain
1082 |       - key: billing
1083 |         pretty_name: Billing
1084 |       - key: zone
1085 |         pretty_name: Zone
1086 |       - key: nameServers
1087 |         pretty_name: Name Servers
1088 |       - key: registered
1089 |         pretty_name: Registered
1090 |       - key: lastLoadedAt
1091 |         pretty_name: Last Loaded At
1092 |       - key: whoisServer
1093 |         pretty_name: Whois Server
1094 |       - key: contactEmail
1095 |         pretty_name: Contact Email
1096 |       - key: admin
1097 |         pretty_name: Admin
1098 |       - key: expiresAt
1099 |         pretty_name: Expires At
1100 |       - key: registrar
1101 |         pretty_name: Registrar
1102 |       - key: tech
1103 |         pretty_name: Tech
1104 |       - key: registrant
1105 |         pretty_name: Registrant
1106 | 
1107 | passivetotal_sslcert:
1108 |   name: PassiveTotal SSL Certificate History
1109 |   default: False
1110 |   otypes:
1111 |     - ipv4
1112 |   json:
1113 |     request:
1114 |       url: 'https://api.passivetotal.org/v2/ssl-certificate/history'
1115 |       auth: passivetotal
1116 |       params:
1117 |         query: '{target}'
1118 |       method: get
1119 |       headers:
1120 |         Accept: application/json
1121 |       ignored_status_codes:
1122 |         - 401
1123 |     results:
1124 |       - key: results
1125 |         multi_match:
1126 |           keys:
1127 |             - key: sha1
1128 |               pretty_name: Sha1
1129 |             - key: firstSeen
1130 |               pretty_name: First Seen
1131 |             - key: ipAddresses
1132 |               pretty_name: Ip Addresses
1133 |             - key: lastSeen
1134 |               pretty_name: Last Seen
1135 |         pretty_name: Results
1136 | 
1137 | passivetotal_components:
1138 |   name: PassiveTotal Components
1139 |   default: False
1140 |   otypes:
1141 |     - fqdn
1142 |   json:
1143 |     request:
1144 |       url: 'https://api.passivetotal.org/v2/host-attributes/components'
1145 |       auth: passivetotal
1146 |       params:
1147 |         query: '{target}'
1148 |       method: get
1149 |       headers:
1150 |         Accept: application/json
1151 |       ignored_status_codes:
1152 |         - 401
1153 |     results:
1154 |       - key: results
1155 |         multi_match:
1156 |           keys:
1157 |           - key: category
1158 |             pretty_name: Category
1159 |           - key: hostname
1160 |             pretty_name: Hostname
1161 |           - key: lastSeen
1162 |             pretty_name: Last Seen
1163 |           - key: firstSeen
1164 |             pretty_name: First Seen
1165 |           - key: label
1166 |             pretty_name: Label
1167 |         pretty_name: Results
1168 | 
1169 | passivetotal_trackers:
1170 |   name: PassiveTotal Trackers
1171 |   default: False
1172 |   otypes:
1173 |     - fqdn
1174 |   json:
1175 |     request:
1176 |       url: 'https://api.passivetotal.org/v2/host-attributes/trackers'
1177 |       auth: passivetotal
1178 |       params:
1179 |         query: '{target}'
1180 |       method: get
1181 |       headers:
1182 |         Accept: application/json
1183 |       ignored_status_codes:
1184 |         - 401
1185 |     results:
1186 |       - key: results
1187 |         multi_match:
1188 |           keys:
1189 |           - key: hostname
1190 |             pretty_name: Hostname
1191 |           - key: attributeType
1192 |             pretty_name: Type
1193 |           - key: attributeValue
1194 |             pretty_name: Value
1195 |           - key: lastSeen
1196 |             pretty_name: Last Seen
1197 |           - key: firstSeen
1198 |             pretty_name: First Seen
1199 |         pretty_name: Results
1200 | fraudguard:
1201 |   name: FraudGuard
1202 |   default: False
1203 |   otypes:
1204 |     - ipv4
1205 |   json:
1206 |     request:
1207 |       url: https://api.fraudguard.io/ip/{target}
1208 |       auth: fraudguard
1209 |     results:
1210 |     - key: isocode
1211 |       pretty_name: FraudGuard Country Code
1212 |     - key: country
1213 |       pretty_name: FraudGuard Country
1214 |     - key: state
1215 |       pretty_name: FraudGuard State
1216 |     - key: city
1217 |       pretty_name: FraudGuard City
1218 |     - key: discover_date
1219 |       pretty_name: FraudGuard Discovery Date
1220 |     - key: threat
1221 |       pretty_name: FraudGuard Threat Type
1222 |     - key: risk_level
1223 |       pretty_name: FraudGuard Risk Level
1224 | shodan:
1225 |   name: Shodan
1226 |   default: False
1227 |   otypes:
1228 |     - ipv4
1229 |   json:
1230 |     request:
1231 |       url: https://api.shodan.io/shodan/host/{target}
1232 |       params:
1233 |         key:
1234 |     results:
1235 |     - key: '@'
1236 |       multi_match:
1237 |         keys:
1238 |           - asn
1239 |           - org
1240 |           - city
1241 |           - region
1242 |           - country_code
1243 |           - postal_code
1244 |       pretty_name: Shodan Organization
1245 |     - key: hostnames
1246 |       match_all: true
1247 |       pretty_name: Shodan Hostnames
1248 |     - key: isp
1249 |       pretty_name: Shodan ISP
1250 |     - key: data
1251 |       multi_match:
1252 |         keys:
1253 |           - timestamp
1254 |           - transport
1255 |           - port
1256 |           - product
1257 |           - version
1258 |       pretty_name: Shodan Ports
1259 |     - key: data
1260 |       multi_match:
1261 |         keys:
1262 |           - transport
1263 |           - port
1264 |           - ssl.versions
1265 |         onlyif: ssl.versions
1266 |       pretty_name: Shodan SSL Versions
1267 |     - key: data
1268 |       multi_match:
1269 |         keys:
1270 |           - transport
1271 |           - port
1272 |           - ssl.cert.subject.CN
1273 |           - ssl.cert.fingerprint.sha256
1274 |         onlyif: ssl.cert.fingerprint.sha256
1275 |       pretty_name: Shodan SSL Certs
1276 | ipinfoio:
1277 |     name: ipinfo.io
1278 |     default: False
1279 |     otypes:
1280 |         - ipv4
1281 |         - ipv6
1282 |     json:
1283 |         request:
1284 |             url: https://ipinfo.io/{target}
1285 |             headers:
1286 |                 Accept: application/json
1287 |         results:
1288 |             - key: hostname
1289 |               pretty_name: ipinfo.io hostname
1290 |             - key: city
1291 |               pretty_name: ipinfo.io city
1292 |             - key: region
1293 |               pretty_name: ipinfo.io region
1294 |             - key: country
1295 |               pretty_name: ipinfo.io country
1296 |             - key: loc
1297 |               pretty_name: ipinfo.io geolocation
1298 |             - key: org
1299 |               pretty_name: ipinfo.io organization
1300 |             - key: postal
1301 |               pretty_name: ipinfo.io postal code
1302 | xforce-malware:
1303 |     name: IBM XForce Malware Report
1304 |     default: False
1305 |     otypes:
1306 |         - ipv4
1307 |     json:
1308 |         request:
1309 |             url: https://api.xforce.ibmcloud.com/ipr/malware/{target}
1310 |             auth: xforce
1311 |         results:
1312 |             - key: type
1313 |               pretty_name: malware type
1314 |             - key: md5
1315 |               pretty_name: md5
1316 |             - key: domain
1317 |               pretty_name: domain name
1318 |             - key: firstseen
1319 |               pretty_name: first seen
1320 |             - key: lastseen
1321 |               pretty_name: last seen
1322 | hackedip:
1323 |   name: Hacked IP
1324 |   default: False
1325 |   otypes:
1326 |     - ipv4
1327 |   json:
1328 |     request:
1329 |       url: http://www.hackedip.com/api.php?ip={target}
1330 |     results:
1331 |     - key: '@'
1332 |       format: as_list
1333 |       pretty_name: Hacked IP Threat List
1334 | metadefender_hash:
1335 |   name: MetaDefender File Report
1336 |   default: False
1337 |   otypes:
1338 |     - hash
1339 |     - hash.sha1
1340 |     - hash.sha256
1341 |   json:
1342 |     request:
1343 |       url: https://api.metadefender.com/v2/hash/{target}
1344 |       method: get
1345 |       headers:
1346 |         apikey:
1347 |     results:
1348 |       - key: scan_results.start_time
1349 |         pretty_name: Date submitted
1350 |       - key: scan_results.total_detected_avs
1351 |         pretty_name: Detected engines
1352 |       - key: scan_results.total_avs
1353 |         pretty_name: Total engines
1354 |       - key: scan_results.scan_details
1355 |         pretty_name: Scans
1356 |         multi_match:
1357 |           keys:
1358 |             - '@'
1359 |             - threat_found
1360 |           onlyif: scan_result_i
1361 | # misp:
1362 | #   name: MISP
1363 | #   default: true
1364 | #   otypes:
1365 | #     - ipv4
1366 | #     - url
1367 | #     - email
1368 | #     - fqdn
1369 | #     - hash
1370 | #     - hash.sha1
1371 | #     - hash.sha256
1372 | #   json:
1373 | #     request:
1374 | #       url: https://***YOUR_MISP_HERE***/events/restSearch/download/{target}/null/null/null/null/7
1375 | #       method: get
1376 | #       headers:
1377 | #         Authorization: ***YOUR_APIKEY_HERE***
1378 | #     results:
1379 | #       - key: response
1380 | #         pretty_name: MISP Events
1381 | #         multi_match:
1382 | #           keys:
1383 | #             - Event.date
1384 | #             - Event.id
1385 | #             - Event.info
1386 | greynoise:
1387 |   # This entry is for the GreyNoise *community* API
1388 |   name: GreyNoise
1389 |   otypes:
1390 |     - ipv4
1391 |   json:
1392 |     request:
1393 |       url: https://api.greynoise.io/v3/community/{target}
1394 |       # headers:
1395 |       #   key: ***YOUR_APIKEY_HERE***
1396 |       #        you can get this from https://viz.greynoise.io/account/
1397 |       ignored_status_codes:
1398 |         - 404
1399 |     results:
1400 |       - key: noise
1401 |         pretty_name: GreyNoise Known Scanner
1402 |       - key: riot
1403 |         pretty_name: GreyNoise Rule-It-OuT
1404 |       - key: classification
1405 |         pretty_name: GreyNoise Classification
1406 |       - key: name
1407 |         pretty_name: GreyNoise Name
1408 | greynoise_ent:
1409 |   # This entry is for the GreyNoise *enterprise* API
1410 |   name: GreyNoise
1411 |   default: False
1412 |   otypes:
1413 |     - ipv4
1414 |   json:
1415 |     request:
1416 |       url: https://enterprise.api.greynoise.io/v2/noise/context/{target}
1417 |       headers:
1418 |         key: YOUR_APIKEY_HERE
1419 |       ignored_status_codes:
1420 |         - 404
1421 |     results:
1422 |       - key: seen
1423 |         pretty_name: GreyNoise Known Scanner
1424 |       - key: actor
1425 |         pretty_name: GreyNoise Actor
1426 |       - key: tags
1427 |         pretty_name: GreyNoise Reason
1428 |       - key: metadata.category
1429 |         pretty_name: GreyNoise Category
1430 |       - key: first_seen
1431 |         pretty_name: GreyNoise First Seen
1432 |       - key: last_seen
1433 |         pretty_name: GreyNoise Last Seen
1434 |       - key: raw_data.web.useragents
1435 |         pretty_name: GreyNoise User-agent
1436 |       - key: raw_data.scan
1437 |         multi_match:
1438 |         keys:
1439 |           - port
1440 |           - protocol
1441 |         pretty_name: GreyNoise Observations
1442 | macvendors:
1443 |   name: MACVendors
1444 |   default: true
1445 |   otypes:
1446 |     - mac
1447 |   webscraper:
1448 |     request:
1449 |       url: 'https://api.macvendors.com/{target}'
1450 |       method: get
1451 |     results:
1452 |       - regex: '(.+)'
1453 |         values:
1454 |           - vendor
1455 |         pretty_name: Mac Address Vendor
1456 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code.
  6 | extension-pkg-whitelist=
  7 | 
  8 | # Add files or directories to the blacklist. They should be base names, not
  9 | # paths.
 10 | ignore=CVS
 11 | 
 12 | # Add files or directories matching the regex patterns to the blacklist. The
 13 | # regex matches against base names, not paths.
 14 | ignore-patterns=
 15 | 
 16 | # Python code to execute, usually for sys.path manipulation such as
 17 | # pygtk.require().
 18 | #init-hook=
 19 | 
 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 21 | # number of processors available to use.
 22 | jobs=1
 23 | 
 24 | # Control the amount of potential inferred values when inferring a single
 25 | # object. This can help the performance when dealing with large functions or
 26 | # complex, nested conditions.
 27 | limit-inference-results=100
 28 | 
 29 | # List of plugins (as comma separated values of python modules names) to load,
 30 | # usually to register additional checkers.
 31 | load-plugins=
 32 | 
 33 | # Pickle collected data for later comparisons.
 34 | persistent=yes
 35 | 
 36 | # Specify a configuration file.
 37 | #rcfile=
 38 | 
 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 40 | # user-friendly hints instead of false-positive error messages.
 41 | suggestion-mode=yes
 42 | 
 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 44 | # active Python interpreter and may run arbitrary code.
 45 | unsafe-load-any-extension=no
 46 | 
 47 | 
 48 | [MESSAGES CONTROL]
 49 | 
 50 | # Only show warnings with the listed confidence levels. Leave empty to show
 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
 52 | confidence=
 53 | 
 54 | # Disable the message, report, category or checker with the given id(s). You
 55 | # can either give multiple identifiers separated by comma (,) or put this
 56 | # option multiple times (only on the command line, not in the configuration
 57 | # file where it should appear only once). You can also use "--disable=all" to
 58 | # disable everything first and then reenable specific checks. For example, if
 59 | # you want to run only the similarities checker, you can use "--disable=all
 60 | # --enable=similarities". If you want to run only the classes checker, but have
 61 | # no Warning level messages displayed, use "--disable=all --enable=classes
 62 | # --disable=W".
 63 | disable=print-statement,
 64 |         parameter-unpacking,
 65 |         unpacking-in-except,
 66 |         old-raise-syntax,
 67 |         backtick,
 68 |         long-suffix,
 69 |         old-ne-operator,
 70 |         old-octal-literal,
 71 |         import-star-module-level,
 72 |         non-ascii-bytes-literal,
 73 |         raw-checker-failed,
 74 |         bad-inline-option,
 75 |         locally-disabled,
 76 |         locally-enabled,
 77 |         file-ignored,
 78 |         suppressed-message,
 79 |         useless-suppression,
 80 |         deprecated-pragma,
 81 |         use-symbolic-message-instead,
 82 |         apply-builtin,
 83 |         basestring-builtin,
 84 |         buffer-builtin,
 85 |         cmp-builtin,
 86 |         coerce-builtin,
 87 |         execfile-builtin,
 88 |         file-builtin,
 89 |         long-builtin,
 90 |         raw_input-builtin,
 91 |         reduce-builtin,
 92 |         standarderror-builtin,
 93 |         unicode-builtin,
 94 |         xrange-builtin,
 95 |         coerce-method,
 96 |         delslice-method,
 97 |         getslice-method,
 98 |         setslice-method,
 99 |         no-absolute-import,
100 |         old-division,
101 |         dict-iter-method,
102 |         dict-view-method,
103 |         next-method-called,
104 |         metaclass-assignment,
105 |         indexing-exception,
106 |         raising-string,
107 |         reload-builtin,
108 |         oct-method,
109 |         hex-method,
110 |         nonzero-method,
111 |         cmp-method,
112 |         input-builtin,
113 |         round-builtin,
114 |         intern-builtin,
115 |         unichr-builtin,
116 |         map-builtin-not-iterating,
117 |         zip-builtin-not-iterating,
118 |         range-builtin-not-iterating,
119 |         filter-builtin-not-iterating,
120 |         using-cmp-argument,
121 |         eq-without-hash,
122 |         div-method,
123 |         idiv-method,
124 |         rdiv-method,
125 |         exception-message-attribute,
126 |         invalid-str-codec,
127 |         sys-max-int,
128 |         bad-python3-import,
129 |         deprecated-string-function,
130 |         deprecated-str-translate-call,
131 |         deprecated-itertools-function,
132 |         deprecated-types-field,
133 |         next-method-defined,
134 |         dict-items-not-iterating,
135 |         dict-keys-not-iterating,
136 |         dict-values-not-iterating,
137 |         deprecated-operator-function,
138 |         deprecated-urllib-function,
139 |         xreadlines-attribute,
140 |         deprecated-sys-function,
141 |         exception-escape,
142 |         comprehension-escape,
143 | 	line-too-long,
144 | 	missing-docstring,
145 | 	invalid-name,
146 | 	unused-argument,
147 | 	inconsistent-return-statements,
148 | 	arguments-differ,
149 | 	protected-access,
150 | 	too-many-locals,
151 | 	too-many-branches,
152 | 	not-context-manager,
153 | 	unexpected-keyword-arg,
154 | 	no-member,
155 | 	cyclic-import,
156 | 	anomalous-backslash-in-string,
157 | 	import-outside-toplevel,
158 | 	no-else-continue,
159 | 	super-with-arguments
160 | 
161 | 
162 | # Enable the message, report, category or checker with the given id(s). You can
163 | # either give multiple identifier separated by comma (,) or put this option
164 | # multiple time (only on the command line, not in the configuration file where
165 | # it should appear only once). See also the "--disable" option for examples.
166 | enable=c-extension-no-member
167 | 
168 | 
169 | [REPORTS]
170 | 
171 | # Python expression which should return a note less than 10 (10 is the highest
172 | # note). You have access to the variables errors warning, statement which
173 | # respectively contain the number of errors / warnings messages and the total
174 | # number of statements analyzed. This is used by the global evaluation report
175 | # (RP0004).
176 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
177 | 
178 | # Template used to display messages. This is a python new-style format string
179 | # used to format the message information. See doc for all details.
180 | #msg-template=
181 | 
182 | # Set the output format. Available formats are text, parseable, colorized, json
183 | # and msvs (visual studio). You can also give a reporter class, e.g.
184 | # mypackage.mymodule.MyReporterClass.
185 | output-format=text
186 | 
187 | # Tells whether to display a full report or only the messages.
188 | reports=no
189 | 
190 | # Activate the evaluation score.
191 | score=yes
192 | 
193 | 
194 | [REFACTORING]
195 | 
196 | # Maximum number of nested blocks for function / method body
197 | max-nested-blocks=5
198 | 
199 | # Complete name of functions that never returns. When checking for
200 | # inconsistent-return-statements if a never returning function is called then
201 | # it will be considered as an explicit return statement and no message will be
202 | # printed.
203 | never-returning-functions=sys.exit
204 | 
205 | 
206 | [BASIC]
207 | 
208 | # Naming style matching correct argument names.
209 | argument-naming-style=snake_case
210 | 
211 | # Regular expression matching correct argument names. Overrides argument-
212 | # naming-style.
213 | #argument-rgx=
214 | 
215 | # Naming style matching correct attribute names.
216 | attr-naming-style=snake_case
217 | 
218 | # Regular expression matching correct attribute names. Overrides attr-naming-
219 | # style.
220 | #attr-rgx=
221 | 
222 | # Bad variable names which should always be refused, separated by a comma.
223 | bad-names=foo,
224 |           bar,
225 |           baz,
226 |           toto,
227 |           tutu,
228 |           tata
229 | 
230 | # Naming style matching correct class attribute names.
231 | class-attribute-naming-style=any
232 | 
233 | # Regular expression matching correct class attribute names. Overrides class-
234 | # attribute-naming-style.
235 | #class-attribute-rgx=
236 | 
237 | # Naming style matching correct class names.
238 | class-naming-style=PascalCase
239 | 
240 | # Regular expression matching correct class names. Overrides class-naming-
241 | # style.
242 | #class-rgx=
243 | 
244 | # Naming style matching correct constant names.
245 | const-naming-style=UPPER_CASE
246 | 
247 | # Regular expression matching correct constant names. Overrides const-naming-
248 | # style.
249 | #const-rgx=
250 | 
251 | # Minimum line length for functions/classes that require docstrings, shorter
252 | # ones are exempt.
253 | docstring-min-length=-1
254 | 
255 | # Naming style matching correct function names.
256 | function-naming-style=snake_case
257 | 
258 | # Regular expression matching correct function names. Overrides function-
259 | # naming-style.
260 | #function-rgx=
261 | 
262 | # Good variable names which should always be accepted, separated by a comma.
263 | good-names=i,
264 |            j,
265 |            k,
266 |            ex,
267 |            Run,
268 |            _
269 | 
270 | # Include a hint for the correct naming format with invalid-name.
271 | include-naming-hint=no
272 | 
273 | # Naming style matching correct inline iteration names.
274 | inlinevar-naming-style=any
275 | 
276 | # Regular expression matching correct inline iteration names. Overrides
277 | # inlinevar-naming-style.
278 | #inlinevar-rgx=
279 | 
280 | # Naming style matching correct method names.
281 | method-naming-style=snake_case
282 | 
283 | # Regular expression matching correct method names. Overrides method-naming-
284 | # style.
285 | #method-rgx=
286 | 
287 | # Naming style matching correct module names.
288 | module-naming-style=snake_case
289 | 
290 | # Regular expression matching correct module names. Overrides module-naming-
291 | # style.
292 | #module-rgx=
293 | 
294 | # Colon-delimited sets of names that determine each other's naming style when
295 | # the name regexes allow several styles.
296 | name-group=
297 | 
298 | # Regular expression which should only match function or class names that do
299 | # not require a docstring.
300 | no-docstring-rgx=^_
301 | 
302 | # List of decorators that produce properties, such as abc.abstractproperty. Add
303 | # to this list to register other decorators that produce valid properties.
304 | # These decorators are taken in consideration only for invalid-name.
305 | property-classes=abc.abstractproperty
306 | 
307 | # Naming style matching correct variable names.
308 | variable-naming-style=snake_case
309 | 
310 | # Regular expression matching correct variable names. Overrides variable-
311 | # naming-style.
312 | #variable-rgx=
313 | 
314 | 
315 | [FORMAT]
316 | 
317 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
318 | expected-line-ending-format=
319 | 
320 | # Regexp for a line that is allowed to be longer than the limit.
321 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
322 | 
323 | # Number of spaces of indent required inside a hanging  or continued line.
324 | indent-after-paren=4
325 | 
326 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
327 | # tab).
328 | indent-string='    '
329 | 
330 | # Maximum number of characters on a single line.
331 | max-line-length=100
332 | 
333 | # Maximum number of lines in a module.
334 | max-module-lines=1000
335 | 
336 | # List of optional constructs for which whitespace checking is disabled. `dict-
337 | # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
338 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
339 | # `empty-line` allows space-only lines.
340 | no-space-check=trailing-comma,
341 |                dict-separator
342 | 
343 | # Allow the body of a class to be on the same line as the declaration if body
344 | # contains single statement.
345 | single-line-class-stmt=no
346 | 
347 | # Allow the body of an if to be on the same line as the test if there is no
348 | # else.
349 | single-line-if-stmt=no
350 | 
351 | 
352 | [LOGGING]
353 | 
354 | # Logging modules to check that the string format arguments are in logging
355 | # function parameter format.
356 | logging-modules=logging
357 | 
358 | 
359 | [VARIABLES]
360 | 
361 | # List of additional names supposed to be defined in builtins. Remember that
362 | # you should avoid to define new builtins when possible.
363 | additional-builtins=
364 | 
365 | # Tells whether unused global variables should be treated as a violation.
366 | allow-global-unused-variables=yes
367 | 
368 | # List of strings which can identify a callback function by name. A callback
369 | # name must start or end with one of those strings.
370 | callbacks=cb_,
371 |           _cb
372 | 
373 | # A regular expression matching the name of dummy variables (i.e. expected to
374 | # not be used).
375 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
376 | 
377 | # Argument names that match this expression will be ignored. Default to name
378 | # with leading underscore.
379 | ignored-argument-names=_.*|^ignored_|^unused_
380 | 
381 | # Tells whether we should check for unused import in __init__ files.
382 | init-import=no
383 | 
384 | # List of qualified module names which can have objects that can redefine
385 | # builtins.
386 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
387 | 
388 | 
389 | [TYPECHECK]
390 | 
391 | # List of decorators that produce context managers, such as
392 | # contextlib.contextmanager. Add to this list to register other decorators that
393 | # produce valid context managers.
394 | contextmanager-decorators=contextlib.contextmanager
395 | 
396 | # List of members which are set dynamically and missed by pylint inference
397 | # system, and so shouldn't trigger E1101 when accessed. Python regular
398 | # expressions are accepted.
399 | generated-members=
400 | 
401 | # Tells whether missing members accessed in mixin class should be ignored. A
402 | # mixin class is detected if its name ends with "mixin" (case insensitive).
403 | ignore-mixin-members=yes
404 | 
405 | # Tells whether to warn about missing members when the owner of the attribute
406 | # is inferred to be None.
407 | ignore-none=yes
408 | 
409 | # This flag controls whether pylint should warn about no-member and similar
410 | # checks whenever an opaque object is returned when inferring. The inference
411 | # can return multiple potential results while evaluating a Python object, but
412 | # some branches might not be evaluated, which results in partial inference. In
413 | # that case, it might be useful to still emit no-member and other checks for
414 | # the rest of the inferred objects.
415 | ignore-on-opaque-inference=yes
416 | 
417 | # List of class names for which member attributes should not be checked (useful
418 | # for classes with dynamically set attributes). This supports the use of
419 | # qualified names.
420 | ignored-classes=optparse.Values,thread._local,_thread._local
421 | 
422 | # List of module names for which member attributes should not be checked
423 | # (useful for modules/projects where namespaces are manipulated during runtime
424 | # and thus existing member attributes cannot be deduced by static analysis. It
425 | # supports qualified module names, as well as Unix pattern matching.
426 | ignored-modules=
427 | 
428 | # Show a hint with possible names when a member name was not found. The aspect
429 | # of finding the hint is based on edit distance.
430 | missing-member-hint=yes
431 | 
432 | # The minimum edit distance a name should have in order to be considered a
433 | # similar match for a missing member name.
434 | missing-member-hint-distance=1
435 | 
436 | # The total number of similar names that should be taken in consideration when
437 | # showing a hint for a missing member.
438 | missing-member-max-choices=1
439 | 
440 | 
441 | [SIMILARITIES]
442 | 
443 | # Ignore comments when computing similarities.
444 | ignore-comments=yes
445 | 
446 | # Ignore docstrings when computing similarities.
447 | ignore-docstrings=yes
448 | 
449 | # Ignore imports when computing similarities.
450 | ignore-imports=no
451 | 
452 | # Minimum lines number of a similarity.
453 | min-similarity-lines=4
454 | 
455 | 
456 | [MISCELLANEOUS]
457 | 
458 | # List of note tags to take in consideration, separated by a comma.
459 | notes=FIXME,
460 |       XXX,
461 |       TODO
462 | 
463 | 
464 | [SPELLING]
465 | 
466 | # Limits count of emitted suggestions for spelling mistakes.
467 | max-spelling-suggestions=4
468 | 
469 | # Spelling dictionary name. Available dictionaries: none. To make it working
470 | # install python-enchant package..
471 | spelling-dict=
472 | 
473 | # List of comma separated words that should not be checked.
474 | spelling-ignore-words=
475 | 
476 | # A path to a file that contains private dictionary; one word per line.
477 | spelling-private-dict-file=
478 | 
479 | # Tells whether to store unknown words to indicated private dictionary in
480 | # --spelling-private-dict-file option instead of raising a message.
481 | spelling-store-unknown-words=no
482 | 
483 | 
484 | [IMPORTS]
485 | 
486 | # Allow wildcard imports from modules that define __all__.
487 | allow-wildcard-with-all=no
488 | 
489 | # Analyse import fallback blocks. This can be used to support both Python 2 and
490 | # 3 compatible code, which means that the block might have code that exists
491 | # only in one or another interpreter, leading to false positives when analysed.
492 | analyse-fallback-blocks=no
493 | 
494 | # Deprecated modules which should not be used, separated by a comma.
495 | deprecated-modules=optparse,tkinter.tix
496 | 
497 | # Create a graph of external dependencies in the given file (report RP0402 must
498 | # not be disabled).
499 | ext-import-graph=
500 | 
501 | # Create a graph of every (i.e. internal and external) dependencies in the
502 | # given file (report RP0402 must not be disabled).
503 | import-graph=
504 | 
505 | # Create a graph of internal dependencies in the given file (report RP0402 must
506 | # not be disabled).
507 | int-import-graph=
508 | 
509 | # Force import order to recognize a module as part of the standard
510 | # compatibility libraries.
511 | known-standard-library=
512 | 
513 | # Force import order to recognize a module as part of a third party library.
514 | known-third-party=enchant
515 | 
516 | 
517 | [DESIGN]
518 | 
519 | # Maximum number of arguments for function / method.
520 | max-args=5
521 | 
522 | # Maximum number of attributes for a class (see R0902).
523 | max-attributes=7
524 | 
525 | # Maximum number of boolean expressions in an if statement.
526 | max-bool-expr=5
527 | 
528 | # Maximum number of branch for function / method body.
529 | max-branches=12
530 | 
531 | # Maximum number of locals for function / method body.
532 | max-locals=15
533 | 
534 | # Maximum number of parents for a class (see R0901).
535 | max-parents=10
536 | 
537 | # Maximum number of public methods for a class (see R0904).
538 | max-public-methods=20
539 | 
540 | # Maximum number of return / yield for function / method body.
541 | max-returns=6
542 | 
543 | # Maximum number of statements in function / method body.
544 | max-statements=50
545 | 
546 | # Minimum number of public methods for a class (see R0903).
547 | min-public-methods=0
548 | 
549 | 
550 | [CLASSES]
551 | 
552 | # List of method names used to declare (i.e. assign) instance attributes.
553 | defining-attr-methods=__init__,
554 |                       __new__,
555 |                       setUp
556 | 
557 | # List of member names, which should be excluded from the protected access
558 | # warning.
559 | exclude-protected=_asdict,
560 |                   _fields,
561 |                   _replace,
562 |                   _source,
563 |                   _make
564 | 
565 | # List of valid names for the first argument in a class method.
566 | valid-classmethod-first-arg=cls
567 | 
568 | # List of valid names for the first argument in a metaclass class method.
569 | valid-metaclass-classmethod-first-arg=cls
570 | 
571 | 
572 | [EXCEPTIONS]
573 | 
574 | # Exceptions that will emit a warning when being caught. Defaults to
575 | # "Exception".
576 | overgeneral-exceptions=Exception
577 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | defang
 2 | relatime
 3 | urllib3
 4 | stopit
 5 | feedparser
 6 | beautifulsoup4
 7 | python_dateutil
 8 | python_magic
 9 | PyYAML
10 | requests
11 | tzlocal
12 | pytz
13 | dnspython3
14 | ipwhois
15 | html5lib
16 | 
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | #this should hopefully allow us to have a more pypi friendly, always up to date readme
 6 | readMeDir = path.abspath(path.dirname(__file__))
 7 | with open(path.join(readMeDir, 'README.md'), encoding='utf-8') as readFile:
 8 |     long_desc = readFile.read()
 9 | 
10 | 
11 | VERSION = '1.4.11'
12 | 
13 | setup(
14 |     name='machinae',
15 |     version=VERSION,
16 |     author='Steve McMaster',
17 |     author_email='mcmaster@hurricanelabs.com',
18 |     package_dir={'': 'src'},
19 |     packages=find_packages('src'),
20 |     include_package_data=True,
21 |     zip_safe=False,
22 |     url='http://hurricanelabs.github.io/machinae/',
23 |     description='Machinae Security Intelligence Collector',
24 |     long_description=long_desc,
25 |     long_description_content_type='text/markdown',
26 |     install_requires=[
27 |         'dnspython3',
28 |         'ipwhois<0.11',
29 |         'requests',
30 |         'stopit',
31 |         'pyyaml',
32 |         'beautifulsoup4',
33 |         'html5lib',
34 |         'relatime',
35 |         'tzlocal',
36 |         'python-magic',
37 |         'feedparser',
38 |         'defang',
39 |     ],
40 |     entry_points={
41 |         'console_scripts': [
42 |             'machinae = machinae.cmd:main',
43 |         ]
44 |     },
45 |     classifiers=[
46 |         'License :: OSI Approved :: MIT License',
47 |         'Programming Language :: Python :: 3 :: Only',
48 |         'Development Status :: 5 - Production/Stable',
49 |     ],
50 |     bugtrack_url='https://github.com/HurricaneLabs/machinae/issues',
51 | )
52 | 


--------------------------------------------------------------------------------
/src/machinae/__init__.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import re
 3 | import socket
 4 | import ipaddress
 5 | 
 6 | __version__ = "1.4.8"
 7 | TargetInfo = collections.namedtuple("TargetInfo", ("target", "otype", "otype_detected"))
 8 | ErrorResult = collections.namedtuple("ErrorResult", ("target_info", "site_info", "error_info"))
 9 | ResultSet = collections.namedtuple("ResultSet", ("target_info", "results"))
10 | SiteResults = collections.namedtuple("SiteResults", ("site_info", "resultset"))
11 | Result = collections.namedtuple("Result", ("value", "pretty_name"))
12 | 
13 | #pylint: disable=no-else-return,too-many-return-statements
14 | def get_target_type(target):
15 |     try:
16 |         getVer = ipaddress.ip_address(target)
17 |         if getVer.version == 4:
18 |             return "ipv4"
19 |         elif getVer.version == 6:
20 |             return "ipv6"
21 |     except ValueError:
22 |         pass
23 | 
24 |     #pylint: disable=no-else-return
25 |     # Hashes
26 |     if re.match("^[a-f0-9]{32}$", target, re.I):
27 |         # MD5
28 |         return "hash"
29 |     elif re.match("^[a-f0-9]{40}$", target, re.I):
30 |         # SHA-1
31 |         return "hash.sha1"
32 |     elif re.match("^[a-f0-9]{64}$", target, re.I):
33 |         # SHA-256
34 |         return "hash.sha256"
35 |     elif re.match("^[a-f0-9]{128}$", target, re.I):
36 |         # SHA-512
37 |         return "hash.sha512"
38 | 
39 |     # URL
40 |     elif re.match("^https?://", target, re.I):
41 |         return "url"
42 | 
43 |     # Email Addresses
44 |     elif re.match("^.*?@.*?$", target, re.I):
45 |         return "email"
46 | 
47 |     # SSL fingerprints
48 |     elif re.match("^(?:[a-f0-9]{2}:){19}[a-f0-9]{2}$", target, flags=re.I):
49 |         return "sslfp"
50 | 
51 |     # Mac Addresses
52 |     elif re.match("^([0-9a-fA-F][0-9a-fA-F][-:\.]){5}([0-9a-fA-F][0-9a-fA-F])$", target, re.I):
53 |         return "mac"
54 | 
55 |     return "fqdn"
56 | 
57 | 
58 | # d2 takes precedence
59 | def dict_merge(d1, d2):
60 |     d3 = d1.copy()
61 |     for key in d2:
62 |         if key in d3 and hasattr(d3[key], "items") and hasattr(d2[key], "items"):
63 |             d3[key] = dict_merge(d3[key], d2[key])
64 |         elif hasattr(d2[key], "items"):
65 |             d3[key] = d2[key].copy()
66 |         else:
67 |             d3[key] = d2[key]
68 |     return d3
69 | 


--------------------------------------------------------------------------------
/src/machinae/cmd.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import sys
  5 | from collections import OrderedDict
  6 | import stopit
  7 | from machinae import __version__
  8 | 
  9 | from . import dict_merge, get_target_type, outputs, utils
 10 | from . import ErrorResult, Result, ResultSet, SiteResults, TargetInfo
 11 | from .sites import Site
 12 | 
 13 | 
 14 | default_config_locations = (
 15 |     "machinae.yml",
 16 |     "/etc/machinae.yml",
 17 |     os.path.expanduser(os.getenv("MACHINAE_CONFIG", "")),
 18 | )
 19 | 
 20 | 
 21 | class MachinaeCommand:
 22 |     _conf = None
 23 |     _sites = None
 24 | 
 25 |     def __init__(self, args=None):
 26 |         if args is None:
 27 |             ap = argparse.ArgumentParser()
 28 |             ap.add_argument("-c", "--config", default=None)
 29 |             ap.add_argument("--nomerge", default=False, action="store_true")
 30 | 
 31 |             ap.add_argument("-d", "--delay", default=0)
 32 |             ap.add_argument("-f", "--file", default="-")
 33 |             ap.add_argument("-i", "--infile", default=None)
 34 |             ap.add_argument("-o", dest="output", default="N", choices=("D", "J", "N", "S"))
 35 |             ap.add_argument("-O", "--otype",
 36 |                             choices=("ipv4", "ipv6", "fqdn", "email", "sslfp", "hash", "url", "mac")
 37 |                             )
 38 |             ap.add_argument("-q", "--quiet", dest="verbose", default=True, action="store_false")
 39 |             ap.add_argument("-s", "--sites", default="default")
 40 |             ap.add_argument("-a", "--auth")
 41 |             ap.add_argument("-H", "--http-proxy", dest="http_proxy")
 42 |             ap.add_argument("targets", nargs=argparse.REMAINDER)
 43 |             ap.add_argument("-v", "--version", action="version", version="%(prog)s "+ __version__)
 44 | 
 45 |             modes = ap.add_mutually_exclusive_group()
 46 |             modes.add_argument("--dump-config", dest="mode",
 47 |                                action="store_const", const="dump_config")
 48 |             modes.add_argument("--detect-otype", dest="mode",
 49 |                                action="store_const", const="detect_otype")
 50 |             modes.add_argument("--list-sites", dest="mode",
 51 |                                action="store_const", const="list_sites")
 52 |             args = ap.parse_args()
 53 |         self.args = args
 54 | 
 55 |     @property
 56 |     def conf(self):
 57 |         if self._conf is None:
 58 |             path = None
 59 |             if self.args.config:
 60 |                 path = self.args.config
 61 |             else:
 62 |                 for possible_path in default_config_locations:
 63 |                     if possible_path is None:
 64 |                         continue
 65 |                     if os.path.exists(possible_path):
 66 |                         path = possible_path
 67 |                         break
 68 | 
 69 |             if path:
 70 |                 with open(path, "r") as f:
 71 |                     conf = utils.safe_load(f)
 72 |             else:
 73 |                 conf = {}
 74 | 
 75 |             if not self.args.nomerge:
 76 |                 local_path = "/etc/machinae.local.yml"
 77 |                 if os.path.exists(local_path):
 78 |                     with open(local_path, "r") as f:
 79 |                         local_conf = utils.safe_load(f)
 80 |                     conf = dict_merge(conf, local_conf)
 81 | 
 82 |                 local_path = os.path.expanduser("~/.machinae.yml")
 83 |                 if os.path.exists(local_path):
 84 |                     with open(local_path, "r") as f:
 85 |                         local_conf = utils.safe_load(f)
 86 |                     conf = dict_merge(conf, local_conf)
 87 | 
 88 |             self._conf = conf
 89 |         return self._conf
 90 | 
 91 |     @property
 92 |     #pylint: disable=too-many-locals, too-many-branches
 93 |     def results(self):
 94 |         creds = None
 95 |         if self.args.auth and os.path.isfile(self.args.auth):
 96 |             with open(self.args.auth) as auth_f:
 97 |                 creds = utils.safe_load(auth_f.read())
 98 | 
 99 |         proxies = {}
100 |         if self.args.http_proxy:
101 |             proxies["http"] = self.args.http_proxy
102 |             proxies["https"] = self.args.http_proxy
103 |         else:
104 |             if "HTTP_PROXY" in os.environ:
105 |                 proxies["http"] = os.environ["HTTP_PROXY"]
106 |             elif "http_proxy" in os.environ:
107 |                 proxies["http"] = os.environ["http_proxy"]
108 |             if "HTTPS_PROXY" in os.environ:
109 |                 proxies["https"] = os.environ["HTTPS_PROXY"]
110 |             elif "https_proxy" in os.environ:
111 |                 proxies["https"] = os.environ["https_proxy"]
112 | 
113 |         if "http" in proxies:
114 |             print("HTTP Proxy: {http}".format(**proxies), file=sys.stderr)
115 |         if "https" in proxies:
116 |             print("HTTPS Proxy: {https}".format(**proxies), file=sys.stderr)
117 | 
118 |         for target_info in self.targets:
119 |             (target, otype, _) = target_info
120 | 
121 |             target_results = list()
122 |             #pylint: disable=unused-variable
123 |             for (site_name, site_conf) in self.sites.items():
124 |                 if otype.lower() not in map(lambda x: x.lower(), site_conf["otypes"]):
125 |                     continue
126 | 
127 |                 site_conf["target"] = target
128 |                 site_conf["verbose"] = self.args.verbose
129 |                 scraper = Site.from_conf(site_conf, creds=creds, proxies=proxies)  # , verbose=self.verbose)
130 | 
131 |                 try:
132 |                     with stopit.SignalTimeout(15, swallow_exc=False):
133 |                         run_results = list()
134 |                         for r in scraper.run():
135 |                             if "value" not in r:
136 |                                 r = {"value": r, "pretty_name": None}
137 |                             run_results.append(Result(r["value"], r["pretty_name"]))
138 |                 except stopit.TimeoutException:
139 |                     target_results.append(ErrorResult(target_info, site_conf, "Timeout"))
140 |                 #pylint: disable=broad-except
141 |                 #Will be cleaned up in upcoming refactor
142 |                 except Exception as e:
143 |                     target_results.append(ErrorResult(target_info, site_conf, e))
144 |                 else:
145 |                     target_results.append(SiteResults(site_conf, run_results))
146 | 
147 |             yield ResultSet(target_info, target_results)
148 | 
149 |     @property
150 |     def sites(self):
151 |         if self._sites is None:
152 |             if self.args.sites.lower() == "all":
153 |                 sites = self._conf.keys()
154 |             elif self.args.sites.lower() == "default":
155 |                 sites = [k for (k, v) in self.conf.items() if v.get("default", True)]
156 |             else:
157 |                 sites = self.args.sites.lower().split(",")
158 |             self._sites = OrderedDict([(k, v) for (k, v) in self.conf.items() if k in sites])
159 |         return copy.deepcopy(self._sites)
160 | 
161 |     @property
162 |     def targets(self):
163 |         targets = list()
164 |         if self.args.infile:
165 |             with open(self.args.infile, "r") as f:
166 |                 targets.extend([line.strip() for line in f.readlines()])
167 | 
168 |         targets.extend(self.args.targets)
169 | 
170 |         for target in targets:
171 |             (otype, otype_detected) = self.detect_otype(target)
172 |             if otype == "url" and not (target.startswith("http://") or target.startswith("https://")):
173 |                 target = "http://{0}".format(target)
174 |             yield TargetInfo(target, otype, otype_detected)
175 | 
176 |     def detect_otype(self, target):
177 |         if self.args.otype:
178 |             return (self.args.otype, False)
179 |         return (get_target_type(target), True)
180 | 
181 |     def run(self):
182 |         fmt = self.args.output.upper()
183 |         dest = self.args.file
184 | 
185 |         if not self.conf:
186 |             sys.stderr.write("Warning: operating without a config file. This is probably not what "
187 |                              "you want. To correct this, fetch a copy of the default "
188 |                              "configuration file from https://github.com/hurricanelabs/machinae "
189 |                              "and place it in /etc/machinae.yml or ~/.machinae.yml and run again."
190 |                              "\n")
191 | 
192 |         if self.args.mode == "dump_config":
193 |             output = utils.dump(self.conf)
194 |         elif self.args.mode == "detect_otype":
195 |             target_dict = OrderedDict()
196 |             for target_info in self.targets:
197 |                 target_dict.update({target_info.target: target_info.otype})
198 |             output = utils.dump(target_dict)
199 |         elif self.args.mode == "list_sites":
200 |             output = utils.listsites(self.conf)
201 |         else:
202 |             output = outputs.MachinaeOutput.get_formatter(fmt).run(self.results)
203 | 
204 |         if dest == "-":
205 |             ofile = sys.stdout
206 |         else:
207 |             ofile = open(dest, "w")
208 | 
209 |         ofile.write(output)
210 | 
211 |         if dest != "-":
212 |             ofile.close()
213 | 
214 | 
215 | def main():
216 |     try:
217 |         cmd = MachinaeCommand()
218 |         cmd.run()
219 |     except KeyboardInterrupt:
220 |         pass
221 | 
222 | 
223 | if __name__ == "__main__":
224 |     main()
225 | 


--------------------------------------------------------------------------------
/src/machinae/outputs.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import json
  3 | from defang import defang
  4 | 
  5 | class MachinaeOutput:
  6 |     @staticmethod
  7 |     #pylint: disable=no-else-return, redefined-builtin, inconsistent-return-statements
  8 |     #Will be cleaned up in upcoming refactor
  9 |     def get_formatter(format):
 10 |         if format.upper() == "N":
 11 |             return NormalOutput()
 12 |         elif format.upper() == "J":
 13 |             return JsonOutput()
 14 |         elif format.upper() == "D":
 15 |             return DotEscapedOutput()
 16 |         elif format.upper() == "S":
 17 |             return ShortOutput()
 18 | 
 19 |     @staticmethod
 20 |     def escape(text):
 21 |         return str(text)
 22 | 
 23 |     #pylint: disable=attribute-defined-outside-init
 24 |     #Will be cleaned up in upcoming refactor
 25 |     def init_buffer(self):
 26 |         self._buffer = io.StringIO()
 27 | 
 28 |     def print(self, line, lf=True):
 29 |         self._buffer.write(line)
 30 |         if lf:
 31 |             self._buffer.write("\n")
 32 | 
 33 | 
 34 | class NormalOutput(MachinaeOutput):
 35 |     def output_header(self, target, otype, otype_detected):
 36 |         self.print("*" * 80)
 37 |         self.print("* Information for {0}".format(self.escape(target)))
 38 |         self.print("* Observable type: {0} (Auto-detected: {1})".format(otype, otype_detected))
 39 |         self.print("*" * 80)
 40 |         #This needs to be refactored so the site from args is available here. No time currently, will do though
 41 |         self.print("Not seeing what you expect? Likely not a valid site. Try running with --list-sites")
 42 | 
 43 |     def run(self, result_sets: object):
 44 |         self.init_buffer()
 45 |         #pylint: disable=too-many-nested-blocks
 46 |         for row in result_sets:
 47 |             (target, otype, otype_detected) = row.target_info
 48 | 
 49 |             self.output_header(target, otype, otype_detected)
 50 |             self.print("")
 51 | 
 52 |             for item in row.results:
 53 |                 site = item.site_info
 54 |                 if hasattr(item, "error_info"):
 55 |                     self.print("[!] Error from {0}: {1}".format(site["name"], item.error_info))
 56 |                     continue
 57 | 
 58 |                 if not item.resultset:
 59 |                     self.print("[-] No {0} Results".format(site["name"]))
 60 |                 else:
 61 |                     self.print("[+] {0} Results".format(site["name"]))
 62 |                     for result in item.resultset:
 63 |                         labels = getattr(result[0], "labels", None)
 64 |                         if len(result[0].values()) > 1 or labels is not None:
 65 |                             values = map(repr, result[0].values())
 66 |                             values = map(self.escape, values)
 67 |                             if labels is not None:
 68 |                                 values = zip(labels, values)
 69 |                                 values = ["{0}: {1}".format(label, value) for (label, value) in values]
 70 |                                 output = ", ".join(values)
 71 | 
 72 |                             if result[1] is not None:
 73 |                                 output = "({0})".format(", ".join(values))
 74 |                                 output = defang(output)
 75 |                         else:
 76 |                             output = self.escape(list(result[0].values())[0])
 77 |                             output = defang(output)
 78 |                         if result[1] is not None:
 79 |                             output = "{1}: {0}".format(output, result[1])
 80 |                             output = defang(output)
 81 |                         self.print("    [-] {0}".format(output))
 82 | 
 83 |         return self._buffer.getvalue()
 84 | 
 85 | 
 86 | class DotEscapedOutput(NormalOutput):
 87 |     escapes = {
 88 |         # ".": "\u2024",
 89 |         # ".": "<dot>",
 90 |         # ".": " DOT ",
 91 |         ".": "[.]",
 92 |         "@": " AT ",
 93 |         "http://": "hxxp://",
 94 |         "https://": "hxxps://",
 95 |     }
 96 | 
 97 |     def output_header(self, target, otype, otype_detected):
 98 |         super().output_header(target, otype, otype_detected)
 99 |         self.print("* These characters are escaped in the output below:")
100 |         for (find, replace) in self.escapes.items():
101 |             self.print("* '{0}' replaced with '{1}'".format(find, replace))
102 |         self.print("* Do not click any links you find below")
103 |         self.print("*" * 80)
104 | 
105 |     @classmethod
106 |     def escape(cls, text):
107 |         text = super(DotEscapedOutput, cls).escape(text)
108 |         for (find, replace) in cls.escapes.items():
109 |             text = text.replace(find, replace)
110 |         return text
111 | 
112 | #pylint: disable=no-self-use, unused-variable
113 | #Will be cleaned up in upcoming refactor
114 | class JsonGenerator(MachinaeOutput):
115 |     def run(self, result_sets):
116 |         records = list()
117 |         for row in result_sets:
118 |             (target, otype, otype_detected) = row.target_info
119 | 
120 |             for item in row.results:
121 |                 output = dict()
122 |                 output["site"] = item.site_info["name"]
123 |                 output["results"] = dict()
124 |                 output["observable"] = target
125 |                 output["observable type"] = otype
126 |                 output["observable type detected"] = otype_detected
127 | 
128 |                 if hasattr(item, "error_info"):
129 |                     output["results"] = {"error_info": str(item.error_info)}
130 |                 elif item.resultset:
131 |                     for result in item.resultset:
132 |                         if result.pretty_name not in output["results"]:
133 |                             output["results"][result.pretty_name] = list()
134 |                         values = list(result.value.values())
135 |                         if len(values) == 1:
136 |                             output["results"][result.pretty_name].append(values[0])
137 |                         elif len(values) > 1:
138 |                             output["results"][result.pretty_name].append(values)
139 |                     for (k, v) in output["results"].items():
140 |                         if len(v) == 1:
141 |                             output["results"][k] = v[0]
142 |                 records.append(output)
143 |         return records
144 | 
145 | 
146 | class JsonOutput(JsonGenerator):
147 |     def run(self, result_sets):
148 |         self.init_buffer()
149 | 
150 |         for record in super().run(result_sets):
151 |             self.print(json.dumps(record))
152 | 
153 |         return self._buffer.getvalue()
154 | 
155 | 
156 | class ShortOutput(MachinaeOutput):
157 |     def run(self, result_sets):
158 |         self.init_buffer()
159 | 
160 |         for row in result_sets:
161 |             (target, otype, otype_detected) = row.target_info
162 |             self.print("[+] {0}".format(target))
163 | 
164 |             for item in row.results:
165 |                 site = item.site_info
166 |                 if hasattr(item, "error_info"):
167 |                     self.print("    {0}: Error".format(site["name"]))
168 |                 elif not item.resultset:
169 |                     self.print("    {0}: No".format(site["name"]))
170 |                 else:
171 |                     self.print("    {0}: Yes".format(site["name"]))
172 | 
173 |         return self._buffer.getvalue()
174 | 


--------------------------------------------------------------------------------
/src/machinae/sites/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import urllib.parse
 4 | 
 5 | #pylint: disable=useless-object-inheritance
 6 | class Site(object):
 7 |     _session = None
 8 |     _kwargs = None
 9 | 
10 |     def __init__(self, conf, creds=None, proxies=None):
11 |         self.conf = conf
12 |         self.creds = creds
13 |         self.proxies = proxies
14 | 
15 |     def kwargs_getter(self):
16 |         return self._kwargs
17 | 
18 |     def kwargs_setter(self, kwargs):
19 |         if "target" in kwargs:
20 |             target = kwargs.pop("target")
21 |             if "target" in self.conf.get("request", {}):
22 |                 target_conf = self.conf["request"]["target"]
23 | 
24 |                 # PTR-style
25 |                 ptr_style = str(target_conf.get("ptr", False)).lower()
26 |                 if ptr_style in ("1", "yes", "true"):
27 |                     target = ".".join(reversed(target.split(".")))
28 | 
29 |                 urlencode = str(target_conf.get("urlencode", False)).lower()
30 |                 if urlencode in ("1", "yes", "true"):
31 |                     target = urllib.parse.quote(target)
32 |                 elif urlencode == "twice":
33 |                     target = urllib.parse.quote(
34 |                         urllib.parse.quote(target, safe="")
35 |                     )
36 | 
37 |                 if "format" in target_conf:
38 |                     target = target_conf["format"] % (target,)
39 | 
40 |             kwargs["target"] = target
41 | 
42 |         self._kwargs = kwargs
43 | 
44 |     kwargs = property(kwargs_getter, kwargs_setter)
45 | 
46 |     @staticmethod
47 |     def from_conf(conf, *args, **kwargs):
48 |         from . import csv, html, rss, json, ipwhois
49 |         if "webscraper" in conf:
50 |             site_conf = conf.pop("webscraper")
51 |             scraper = html.Webscraper(site_conf, *args, **kwargs)
52 |         elif "tablescraper" in conf:
53 |             site_conf = conf.pop("tablescraper")
54 |             scraper = html.TableScraper(site_conf, *args, **kwargs)
55 |         elif "json" in conf:
56 |             site_conf = conf.pop("json")
57 |             scraper = json.JsonApi(site_conf, *args, **kwargs)
58 |         elif "csv" in conf:
59 |             site_conf = conf.pop("csv")
60 |             scraper = csv.CsvSite(site_conf, *args, **kwargs)
61 |         elif "rss" in conf:
62 |             site_conf = conf.pop("rss")
63 |             scraper = rss.RssSite(site_conf, *args, **kwargs)
64 |         elif "ipwhois" in conf:
65 |             site_conf = conf.pop("ipwhois")
66 |             scraper = ipwhois.IpWhois(site_conf, *args, **kwargs)
67 |         # elif "dns" in conf:
68 |         #     scraper = DnsSite(conf["dns"], *args, **kwargs)
69 |         # elif "ipwhois" in conf:
70 |         #     scraper = IpWhois(conf["ipwhois"], *args, **kwargs)
71 |         else:
72 |             raise NotImplementedError(conf.keys())
73 |         scraper.kwargs = conf.copy()
74 |         return scraper
75 | 
76 |     def get_content(self):
77 |         raise NotImplementedError
78 |     #pylint: disable=no-member
79 |     def __iter__(self):
80 |         for _ in self.run():
81 |             yield _
82 | 


--------------------------------------------------------------------------------
/src/machinae/sites/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import gzip
  4 | import io
  5 | import warnings
  6 | import zipfile
  7 | from collections import OrderedDict
  8 | 
  9 | import magic
 10 | import pytz
 11 | import relatime
 12 | import requests
 13 | from tzlocal import get_localzone
 14 | try:
 15 |     #pylint: disable=ungrouped-imports
 16 |     from requests.packages.urllib3 import exceptions
 17 | except ImportError:
 18 |     # Apparently, some linux distros strip the packages out of requests
 19 |     # I'm not going to tell you what I think of that, just going to deal with it
 20 |     from urllib3 import exceptions
 21 | 
 22 | from . import Site
 23 | 
 24 | 
 25 | class HttpSite(Site):
 26 |     @property
 27 |     def url(self):
 28 |         print(self.conf["request"]["url"].format(**self.kwargs))
 29 |         return self.conf["request"]["url"].format(**self.kwargs)
 30 | 
 31 |     @property
 32 |     def session(self):
 33 |         if self._session is None:
 34 |             self._session = requests.Session()
 35 |             self._session.headers.update({"User-Agent": "Vor/1.0 (Like CIF/2.0)"})
 36 |             if self.proxies:
 37 |                 self._session.proxies = self.proxies
 38 |         return self._session
 39 | 
 40 |     @staticmethod
 41 |     def unzip_content(r, *args, **kwargs):
 42 |         content = r.content
 43 | 
 44 |         mime = magic.from_buffer(content, mime=True)
 45 | 
 46 |         if mime == "application/zip":
 47 |             zip_buffer = io.BytesIO(content)
 48 |             with zipfile.ZipFile(zip_buffer) as zf:
 49 |                 fn = zf.namelist()[0]
 50 |                 with zf.open(fn) as f:
 51 |                     r._content = f.read()
 52 |         elif mime == "application/x-gzip":
 53 |             gz_buffer = io.BytesIO(content)
 54 |             with gzip.GzipFile(fileobj=gz_buffer) as gz:
 55 |                 r._content = gz.read()
 56 |         else:
 57 |             r._content = content
 58 | 
 59 |         return r
 60 | 
 61 |     def _req(self, conf, url=None):
 62 |         if url is None:
 63 |             url = conf.get("url", "")
 64 |             if url == "":
 65 |                 return
 66 |             url = url.format(**self.kwargs)
 67 |         method = conf.get("method", "get").upper()
 68 | 
 69 |         kwargs = dict()
 70 |         headers = conf.get("headers", {})
 71 |         if headers:
 72 |             kwargs["headers"] = headers
 73 |         verify_ssl = conf.get("verify_ssl", True)
 74 | 
 75 |         # GET params
 76 |         params = conf.get("params", {}).copy()
 77 |         for (k, v) in params.items():
 78 |             if hasattr(v, "items"):
 79 |                 conf = params.pop(k)
 80 |                 if "relatime" in conf:
 81 |                     dt = relatime.timeParser(conf["relatime"], timezone=str(get_localzone()))
 82 |                     target_tz = pytz.timezone(conf.get("timezone", "UTC"))
 83 |                     dt = dt.astimezone(target_tz)
 84 |                     dt = dt.replace(tzinfo=None)
 85 |                     time_format = conf.get("format", "%Y-%m-%dT%H:%M:%S.%fZ")
 86 |                     if time_format.lower() == "as_epoch":
 87 |                         params[k] = str(int(dt.timestamp()))
 88 |                     else:
 89 |                         params[k] = dt.strftime(time_format)
 90 |             else:
 91 |                 params[k] = str(v).format(**self.kwargs)
 92 |         if params:
 93 |             kwargs["params"] = params
 94 | 
 95 |         # POST data
 96 |         data = conf.get("data", {})
 97 |         for (k, v) in data.items():
 98 |             data[k] = v.format(**self.kwargs)
 99 |         if data:
100 |             kwargs["data"] = data
101 | 
102 |         # HTTP Basic Auth
103 |         if conf.get("auth") and self.creds and self.creds.get(conf["auth"]):
104 |             kwargs["auth"] = tuple(self.creds[conf["auth"]])
105 | 
106 |         # Auto decompress
107 |         if conf.get("decompress", False):
108 |             kwargs["hooks"] = {"response": self.unzip_content}
109 | 
110 |         raw_req = requests.Request(method, url, **kwargs)
111 |         req = self.session.prepare_request(raw_req)
112 |         if self.kwargs.get("verbose", False):
113 |             print("[.] Requesting {0} ({1})".format(req.url, req.method))
114 |         with warnings.catch_warnings():
115 |             if not verify_ssl:
116 |                 warnings.simplefilter("ignore", exceptions.InsecureRequestWarning)
117 |             return self.session.send(req, verify=verify_ssl)
118 | 
119 |     def get_content(self, conf=None, url=None):
120 |         if conf is None:
121 |             conf = self.conf["request"]
122 | 
123 |         r = self._req(conf, url)
124 |         ignored_status_codes = [int(sc) for sc in conf.get("ignored_status_codes", [])]
125 |         if r.status_code not in ignored_status_codes:
126 |             r.raise_for_status()
127 |         return r
128 |     #pylint: disable=no-self-use
129 |     #Will be cleaned up in upcoming refactor
130 |     def build_result(self, parser, result_dict):
131 |         defaults_dict = parser.get("defaults", {})
132 | 
133 |         result = OrderedDict()
134 |         result.update(defaults_dict)
135 |         result.update(result_dict)
136 | 
137 |         result.pop(None, None)
138 | 
139 |         if "map" in parser:
140 |             for (old, new) in parser["map"].items():
141 |                 if new is None:
142 |                     result.pop(old)
143 |                 elif old in result:
144 |                     result[new] = result.pop(old)
145 | 
146 |         # fmt = dict()
147 |         # for (k, v) in result.items():
148 |         #     fk = "<{0}>".format(k)
149 |         #     fmt[fk] = str(v)
150 |         #
151 |         # for (k, v) in result.items():
152 |         #     for (find, replace) in fmt.items():
153 |         #         try:
154 |         #             result[k] = v.replace(find, replace)
155 |         #         except AttributeError:
156 |         #             pass
157 | 
158 |         if "defaults" in parser:
159 |             for (k, v) in parser["defaults"].items():
160 |                 result[k] = v
161 | 
162 |         if "pretty_name" in parser:
163 |             result = OrderedDict([
164 |                 ("value", result),
165 |                 ("pretty_name", parser["pretty_name"])
166 |             ])
167 | 
168 |         if hasattr(result_dict, "labels"):
169 |             result.labels = result_dict.labels
170 | 
171 |         return result
172 | 


--------------------------------------------------------------------------------
/src/machinae/sites/csv.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import csv
 4 | import io
 5 | import re
 6 | 
 7 | from .base import HttpSite
 8 | 
 9 | 
10 | class CsvSite(HttpSite):
11 |     _delim = None
12 | 
13 |     @property
14 |     def dialect(self):
15 |         if "pattern" not in self.conf:
16 |             return "excel"
17 | 
18 |         class DelimDialect(csv.excel):
19 |             delimiter = str(self.delim)
20 |             skipinitialspace = True
21 | 
22 |         return DelimDialect()
23 | 
24 |     @property
25 |     def delim(self):
26 |         return self._delim or self.conf.get("pattern", ",")
27 | 
28 |     def get_content(self):
29 |         r = super(CsvSite, self).get_content()
30 |         body = r.text
31 | 
32 |         if len(self.delim) > 1:
33 |             body = re.sub(self.conf["pattern"], "|", body)
34 |             self._delim = "|"
35 | 
36 |         buf = io.StringIO(body)
37 |         csvfile = csv.reader(buf, dialect=self.dialect)
38 | 
39 |         return csvfile
40 | 
41 |     def run(self):
42 |         r = self._req(self.conf["request"])
43 | 
44 |         body = r.text
45 |         if len(self.delim) > 1:
46 |             body = re.sub(self.conf["pattern"], "|", body)
47 |             self._delim = "|"
48 | 
49 |         buf = io.StringIO(body)
50 |         csvfile = csv.reader(buf, dialect=self.dialect)
51 | 
52 |         for (lineno, row) in enumerate(csvfile):
53 |             for parser in self.conf["results"]:
54 |                 start = parser.get("start", 1)
55 |                 stop = parser.get("end", None)
56 | 
57 |                 # raise ValueError(start, stop)
58 |                 #pylint: disable=len-as-condition
59 |                 if lineno < start or len(row) == 0 or row[0].startswith("#"):
60 |                     continue
61 |                 elif stop is not None and lineno > stop:
62 |                     break
63 | 
64 |                 if "match" in parser:
65 |                     rex = re.compile(parser["match"]["regex"])
66 |                     col = int(parser["match"]["column"])
67 |                     if not rex.search(row[col]):
68 |                         continue
69 | 
70 |                 row = [item.strip() for item in row]
71 |                 result_dict = dict(zip(parser["values"], row))
72 |                 yield self.build_result(parser, result_dict)
73 | 


--------------------------------------------------------------------------------
/src/machinae/sites/html.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import re
 4 | from collections import Counter
 5 | 
 6 | from bs4 import BeautifulSoup, Comment
 7 | 
 8 | from .base import HttpSite
 9 | 
10 | 
11 | def html_unescape(content):
12 |     try:
13 |         import html
14 |         return html.unescape(content)
15 |     except ImportError:
16 |         import HTMLParser
17 |         html_parser = HTMLParser.HTMLParser()
18 |         return html_parser.unescape(content)
19 | 
20 | 
21 | class HtmlSite(HttpSite):
22 |     def get_html(self):
23 |         r = super(HtmlSite, self).get_content()
24 |         body = r.text
25 | 
26 |         cleanup = self.conf["request"].get("cleanup", {})
27 | 
28 |         strip_comments = str(cleanup.get("strip_comments", False)).lower()
29 |         if strip_comments in ("1", "yes", "true"):
30 |             soup = BeautifulSoup(r.text, "html5lib")
31 |             for comment in soup.find_all(text=lambda _: isinstance(_, Comment)):
32 |                 comment.extract()
33 |             body = str(soup)
34 | 
35 |         return html_unescape(body)
36 | 
37 | 
38 | class TableScraper(HtmlSite):
39 |     @staticmethod
40 |     def compare_rows(row1, row2):
41 |         row1 = [cell.strip().lower() for cell in row1]
42 |         row2 = [cell.strip().lower() for cell in row2]
43 |         #pylint: disable=superfluous-parens
44 |         #I believe these to actually be unnecessary,er,superfluous but will have to test
45 |         return (Counter(row1) == Counter(row2))
46 | 
47 |     @staticmethod
48 |     def get_row_contents(row):
49 |         return [cell.get_text().strip() for cell in row.find_all(["td", "th"])]
50 | 
51 |     @classmethod
52 |     def find_table(cls, html, headers):
53 |         soup = BeautifulSoup(html, "html5lib")
54 |         for table in soup.find_all("table"):
55 |             cells = cls.get_row_contents(table.find("tr"))
56 |             if cls.compare_rows(cells, headers):
57 |                 return (table, cells)
58 |         raise ValueError("No matching table found")
59 | 
60 |     def run(self):
61 |         body = self.get_html()
62 | 
63 |         for parser in self.conf["results"]:
64 |             (table, columns) = self.find_table(body, parser["map"].keys())
65 |             for row in table.find_all("tr"):
66 |                 cells = self.get_row_contents(row)
67 |                 if self.compare_rows(cells, columns):
68 |                     continue
69 |                 result_dict = dict(zip(columns, cells))
70 |                 yield self.build_result(parser, result_dict)
71 | 
72 | 
73 | class Webscraper(HtmlSite):
74 |     def run(self):
75 |         body = self.get_html()
76 | 
77 |         if "results" not in self.conf:
78 |             raise Exception("No parsing configuration found")
79 |         for parser in self.conf["results"]:
80 |             rex = re.compile(parser["regex"], flags=re.I)
81 |             for match in rex.finditer(body):
82 |                 result_dict = dict()
83 |                 for (k, v) in zip(parser["values"], match.groups()):
84 |                     result_dict[k] = v
85 |                 yield self.build_result(parser, result_dict)
86 | 


--------------------------------------------------------------------------------
/src/machinae/sites/ipwhois.py:
--------------------------------------------------------------------------------
 1 | from ipaddress import ip_address, summarize_address_range
 2 | 
 3 | import ipwhois
 4 | 
 5 | from .json import JsonApi
 6 | 
 7 | 
 8 | class IpWhois(JsonApi):
 9 |     @staticmethod
10 |     def get_cidr(network):
11 |         networks = [str(net) for net in summarize_address_range(
12 |             ip_address(network["start_address"]),
13 |             ip_address(network["end_address"])
14 |         )]
15 |         if len(networks) == 1:
16 |             networks = networks[0]
17 |         return networks
18 | 
19 |     def get_json(self):
20 |         obj = ipwhois.IPWhois(self.kwargs["target"])
21 |         try:
22 |             # import json
23 |             # print(json.dumps(obj.lookup_rdap(depth=2)))
24 |             # return obj.lookup_rdap(depth=2)
25 |             return obj.lookup_rws()
26 |         except AttributeError:
27 |             # rdap = obj.lookup_rdap(inc_raw=True)
28 |             # print(json.dumps(rdap))
29 |             # rdap["network"]["range"] = "{start_address} - {end_address}".format(**rdap["network"])
30 |             # rdap["network"]["cidr"] = self.get_cidr(rdap["network"])
31 |             # return rdap
32 |             # RDAP is a stupid format, use raw whois
33 |             raw = obj.lookup()
34 |             print(raw)
35 |             return raw
36 | 


--------------------------------------------------------------------------------
/src/machinae/sites/json.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import datetime
  4 | import json
  5 | import re
  6 | import urllib.parse
  7 | from collections import OrderedDict
  8 | 
  9 | from dateutil.parser import parse
 10 | 
 11 | from relatime import timeParser
 12 | 
 13 | from .base import HttpSite
 14 | 
 15 | class JsonApi(HttpSite):
 16 |     @staticmethod
 17 |     def get_value(data, key, default=None):
 18 |         if key == "@" or data is None:
 19 |             return data
 20 |         ret = data
 21 |         key_parts = key.split(".")
 22 |         for key_part in key_parts:
 23 |             if key_part not in ret:
 24 |                 return default
 25 |             ret = ret[key_part]
 26 |         return ret
 27 | 
 28 |     def get_json(self, url=None):
 29 |         r = self.get_content(url=url)
 30 | 
 31 |         ignored_status_codes = [int(sc) for sc in self.conf["request"].get("ignored_status_codes", [])]
 32 |         if r.status_code in ignored_status_codes:
 33 |             return []
 34 | 
 35 |         if not self.conf.get("multi_json", False):
 36 |             return r.json()
 37 | 
 38 |         if r.status_code in ignored_status_codes:
 39 |             return []
 40 | 
 41 |         results = list()
 42 |         for json_line in r.text.split("\n"):
 43 |             if not json_line:
 44 |                 break
 45 |             results.append(json.loads(json_line))
 46 |         return results
 47 | 
 48 |     def run(self):
 49 |         data = self.get_json()
 50 | 
 51 |         if hasattr(data, "items"):
 52 |             next_url = None
 53 |             if self.conf.get("paginated", False):
 54 |                 next_url = data.get("next", None)
 55 | 
 56 |             data = [data]
 57 |             while next_url:
 58 |                 next_data = self.get_json(url=next_url)
 59 |                 next_url = next_data.get("next", None)
 60 |                 data.append(next_data)
 61 | 
 62 |         if "results" not in self.conf:
 63 |             return
 64 | 
 65 |         for row in data:
 66 |             for parser in self.conf["results"]:
 67 |                 for _ in self.parse_dict(row, parser):
 68 |                     yield _
 69 | 
 70 |     @classmethod
 71 |     def get_result_dicts(cls, data, parser, mm_key=None, onlyif=None):
 72 |         if not hasattr(parser, "items"):
 73 |             parser = {"key": parser}
 74 | 
 75 |         if "key" not in parser:
 76 |             yield data
 77 |             return
 78 | 
 79 |         key = parser["key"]
 80 |         rex = None
 81 |         if "regex" in parser:
 82 |             rex = re.compile(parser["regex"], flags=re.I)
 83 | 
 84 |         if key == "@" and mm_key is not None:
 85 |             yield {key: mm_key}
 86 |             return
 87 | 
 88 |         values = cls.get_value(data, key)
 89 |         if values is None:
 90 |             return
 91 | 
 92 |         if not parser.get("match_all", False):
 93 |             values = [values]
 94 | 
 95 |         for val in values:
 96 |             result_dict = OrderedDict()
 97 | 
 98 |             if rex:
 99 |                 m = rex.search(val)
100 |                 if not m:
101 |                     return
102 |                 #pylint: disable=len-as-condition
103 |                 if len(m.groups()) > 0:
104 |                     val = m.groups()
105 |                     if len(val) == 1:
106 |                         val = val[0]
107 | 
108 |             urldecode = str(parser.get("urldecode", False)).lower()
109 |             if urldecode in ("1", "yes", "true"):
110 |                 val = urllib.parse.unquote(val)
111 |             elif urldecode == "twice":
112 |                 val = urllib.parse.unquote(
113 |                     urllib.parse.unquote(val)
114 |                 )
115 | 
116 |             if "format" in parser:
117 |                 if parser["format"] == "as_list":
118 |                     val = ", ".join(map(str, val))
119 |                 elif parser["format"] == "as_time":
120 |                     try:
121 |                         dt = datetime.datetime.fromtimestamp(float(val))
122 |                     #pylint: disable=bare-except
123 |                     #Will be cleaned up in future refactor -- I hate mcmaster
124 |                     except:
125 |                         dt = parse(val)
126 |                     val = dt.isoformat()
127 |             result_dict[key] = val
128 | 
129 |             yield result_dict
130 | 
131 |     @classmethod
132 |     def multi_match_generator(cls, data, parser, mm_key):
133 |         if not hasattr(data, "items"):
134 |             # Is a list, process each list item
135 |             for item in data:
136 |                 for _ in cls.multi_match_generator(item, parser, mm_key="@"):
137 |                     yield _
138 | 
139 |             return
140 | 
141 |         onlyif = parser.get("onlyif", None)
142 |         if onlyif is not None and not hasattr(onlyif, "items"):
143 |             onlyif = {"key": onlyif}
144 | 
145 |         # Decide how to iterate on the data
146 |         # Options are:
147 |         #   Return result_dict per match in dict (if: data is dict)
148 |         #   Return one result_dict for whole dict (if: data is dict)
149 |         if mm_key == "@" or parser.get("match_all", False):
150 |             # Treat the entire data as a single match
151 |             # Returns a single result_dict
152 |             data = [(None, data)]
153 |         else:
154 |             # Each matching key is a separate result_dict
155 |             data = data.items()
156 | 
157 |         for (k, v) in data:
158 |             if onlyif is not None:
159 |                 if not hasattr(onlyif, "items"):
160 |                     onlyif = {"key": onlyif}
161 |                 value = cls.get_value(v, onlyif["key"], None)
162 | 
163 |                 if value is None:
164 |                     continue
165 |                 elif "regex" in onlyif:
166 |                     rex = re.compile(onlyif["regex"], re.I)
167 |                     if not rex.search(value):
168 |                         continue
169 |                 # Check for maxage key in onlyif. If it exists, parse it as Splunk relative time syntax and compare to parsed input "value"
170 |                 elif "maxage" in onlyif:
171 |                     age = parse(value)
172 |                     if not onlyif["maxage"].startswith("-"):  # Assume we want dates in the past
173 |                         print('\033[91m' + 'WARNING: maxage must be prepended with "-" Please correct this in your configuration file.' + '\033[0m')
174 |                         onlyif["maxage"] = "-%s" % onlyif["maxage"]
175 |                     ageout = timeParser(onlyif["maxage"]).replace(tzinfo=None)
176 |                     if age < ageout:
177 |                         continue
178 |                 else:
179 |                     if not bool(value):
180 |                         continue
181 |             result_dict = OrderedDict()
182 |             for mm_parser in parser["keys"]:
183 |                 for mm_result_dict in cls.get_result_dicts(v, mm_parser, mm_key=k, onlyif=onlyif):
184 |                     result_dict.update(mm_result_dict)
185 | 
186 |             if result_dict:
187 |                 result_dict.labels = parser.get("labels", None)
188 |                 yield result_dict
189 | 
190 |     def parse_dict(self, data, parser):
191 |         if not hasattr(parser, "items"):
192 |             parser = {"key": parser}
193 | 
194 |         if "multi_match" in parser:
195 |             target = self.get_value(data, parser["key"])
196 |             if target is None:
197 |                 return
198 |             result_iter = self.multi_match_generator(target, parser["multi_match"], parser["key"])
199 |         else:
200 |             result_iter = self.get_result_dicts(data, parser)
201 | 
202 |         for result_dict in result_iter:
203 |             yield self.build_result(parser, result_dict)
204 | 


--------------------------------------------------------------------------------
/src/machinae/sites/rss.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import re
 4 | 
 5 | import feedparser
 6 | 
 7 | from .base import HttpSite
 8 | 
 9 | 
10 | class RssSite(HttpSite):
11 |     def get_content(self):
12 |         r = super(RssSite, self).get_content()
13 |         return feedparser.parse(r.text)
14 | 
15 |     def run(self):
16 |         r = self._req(self.conf["request"])
17 |         body = r.text
18 |         rss = feedparser.parse(body)
19 |         parser = None
20 | 
21 |         for entry in rss.entries:
22 |             for parser1 in self.conf["results"]:
23 |                 result_dict = dict()
24 |                 for (key, parser) in parser1.items():
25 |                     print(parser)
26 |                     rex = re.compile(parser["regex"])
27 |                     fieldnames = parser["values"]
28 |                     if not isinstance(fieldnames, list):
29 |                         fieldnames = [fieldnames]
30 |                     rss_value = getattr(entry, key)
31 |                     m = rex.search(rss_value)
32 |                     if m:
33 |                         result_dict.update(dict(zip(fieldnames, m.groups())))
34 |                     else:
35 |                         result_dict = None
36 |                         break
37 | 
38 |                 if result_dict is None:
39 |                     continue
40 | 
41 |                 yield self.build_result(parser, result_dict)
42 | 


--------------------------------------------------------------------------------
/src/machinae/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import yaml
 3 | 
 4 | class MachinaeLoader(yaml.SafeLoader):
 5 |     #pylint: disable=arguments-differ,too-many-ancestors
 6 |     def construct_mapping(self, node):
 7 |         self.flatten_mapping(node)
 8 |         return OrderedDict(self.construct_pairs(node))
 9 | 
10 | 
11 | MachinaeLoader.add_constructor(
12 |     yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
13 |     MachinaeLoader.construct_mapping)
14 | 
15 | 
16 | #pylint: disable=too-many-ancestors
17 | class MachinaeDumper(yaml.Dumper):
18 |     def represent_dict(self, data):
19 |         return self.represent_mapping('tag:yaml.org,2002:map', data, False)
20 | 
21 |     def represent_list(self, data):
22 |         return self.represent_sequence('tag:yaml.org,2002:seq', data, False)
23 | 
24 | 
25 | MachinaeDumper.add_representer(
26 |     OrderedDict,
27 |     MachinaeDumper.represent_dict)
28 | 
29 | MachinaeDumper.add_representer(
30 |     list,
31 |     MachinaeDumper.represent_list)
32 | 
33 | #This is to load site results as an OrderedDict so we override the
34 | #built-in PyYAML safe_load
35 | def safe_load(*args, **kwargs):
36 |     kwargs["Loader"] = MachinaeLoader
37 |     return yaml.load(*args, **kwargs)
38 | 
39 | 
40 | def dump(*args, **kwargs):
41 |     kwargs["Dumper"] = MachinaeDumper
42 |     return yaml.dump(*args, **kwargs)
43 | 
44 | 
45 | def listsites(conf):
46 |     rstr = '{0:40}{1:40}{2:40}{3}'.format('SITE', 'NAME', 'OTYPES', 'DEFAULT')
47 |     rstr += '\n'
48 |     for key in conf:
49 |         d = 'True'
50 |         if "default" in conf[key].keys():
51 |             d = str(conf[key]["default"])
52 |         rstr += '{0:40}{1:40}{2:40}{3}'.format(key,
53 |                                                conf[key]["name"],
54 |                                                ', '.join(conf[key]["otypes"]),
55 |                                                d)
56 |         rstr += '\n'
57 |     return rstr
58 | 


--------------------------------------------------------------------------------