├── tests
├── __init__.py
└── test_inquisitor.py
├── inquisitor
├── extractors
│ ├── __init__.py
│ └── emails.py
├── sources
│ ├── __init__.py
│ ├── google_search.py
│ └── shodan_search.py
├── assets
│ ├── __init__.py
│ ├── registrant.py
│ ├── email.py
│ ├── linkedin.py
│ ├── block.py
│ └── host.py
└── __init__.py
├── .gitignore
├── setup.py
├── report
└── index.html
├── inq
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/inquisitor/extractors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/inquisitor/sources/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.bat
3 | *.json
4 | *.db
--------------------------------------------------------------------------------
/inquisitor/extractors/emails.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | REGEX = re.compile(r'[^\s@<>]+@[^\s@<>]+\.[^\s@<>]+\b')
4 |
5 | def extract(string):
6 | return set(REGEX.findall(string))
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools import setup, find_packages
3 | except ImportError:
4 | from distutils.core import setup
5 |
6 | config = {
7 | 'description': 'Opinionated organisation-centric OSINT inspired from recon-ng and Maltego',
8 | 'author': 'John Lawrence M. Penafiel',
9 | 'url': 'https://github.com/penafieljlm/inquisitor',
10 | 'download_url': 'https://github.com/penafieljlm/inquisitor',
11 | 'author_email': 'penafieljlm@gmail.com',
12 | 'version': '0.1',
13 | 'install_requires': [
14 | 'google-api-python-client',
15 | 'ipwhois',
16 | 'netaddr',
17 | 'nose',
18 | 'python-whois',
19 | 'shodan',
20 | 'tabulate',
21 | 'tld',
22 | 'unidecode',
23 | 'unqlite',
24 | 'validate_email',
25 | ],
26 | 'packages': find_packages(),
27 | 'scripts': ['inq'],
28 | 'name': 'inquisitor'
29 | }
30 |
31 | setup(**config)
--------------------------------------------------------------------------------
/inquisitor/assets/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class Asset(object):
4 |
5 | def __init__(self, owned=None):
6 | self.owned = owned
7 | self.transforms = dict()
8 |
9 | def cache_transform_store(self, source, assets):
10 | cached = list()
11 | for asset in assets:
12 | module_name = asset.__class__.__module__
13 | module = sys.modules[module_name]
14 | entry = [module_name, getattr(asset, module.OBJECT_ID)]
15 | if entry not in cached:
16 | cached.append(entry)
17 | self.transforms[source] = cached
18 |
19 | def cache_transform_get(self, source, repo):
20 | results = set()
21 | if source not in self.transforms:
22 | return results
23 | cached = self.transforms[source]
24 | for module_name, object_id in cached:
25 | module = sys.modules[module_name]
26 | asset_type = module.ASSET_CLASS
27 | results.add(repo.get_asset_string(
28 | asset_type,
29 | object_id,
30 | create=True,
31 | )[1])
32 | return results
33 |
--------------------------------------------------------------------------------
/report/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/inquisitor/sources/google_search.py:
--------------------------------------------------------------------------------
1 | import googleapiclient.discovery
2 | import inquisitor.assets.email
3 | import inquisitor.assets.host
4 | import inquisitor.assets.linkedin
5 | import inquisitor.assets.registrant
6 | import inquisitor.extractors.emails
7 | import logging
8 | import urlparse
9 |
10 | class GoogleAPI:
11 |
12 | def __init__(self, dev_key, cse_id, limit=None):
13 | self.dev_key = dev_key
14 | self.cse_id = cse_id
15 | self.limit = limit
16 | self.service = googleapiclient.discovery.build(
17 | "customsearch", "v1",
18 | developerKey=self.dev_key,
19 | )
20 |
21 | def search(self, query):
22 | items = list()
23 | page = 1
24 | start = 1
25 | while True:
26 | if self.limit and page > self.limit:
27 | break
28 | try:
29 | results = self.service.cse().list(
30 | q=query,
31 | cx=self.cse_id,
32 | start=start,
33 | ).execute()
34 | if results.get('items'):
35 | items.extend(results.get('items'))
36 | start += 10
37 | page += 1
38 | except googleapiclient.errors.HttpError:
39 | break
40 | return items
41 |
42 | def transform(self, repository, query):
43 | assets = set()
44 | items = self.search(query)
45 | for item in items:
46 | parsed_link = urlparse.urlparse(item['link'])
47 | # Extract Host
48 | try:
49 | assets.add(repository.get_asset_string(
50 | inquisitor.assets.host.Host,
51 | parsed_link.netloc,
52 | create=True,
53 | )[1])
54 | except inquisitor.assets.host.HostValidateException as e:
55 | logging.error(e.message)
56 | # Extract Emails
57 | for email in inquisitor.extractors.emails.extract(item['snippet']):
58 | try:
59 | assets.add(repository.get_asset_string(
60 | inquisitor.assets.email.Email,
61 | email,
62 | create=True,
63 | )[1])
64 | except inquisitor.assets.email.EmailValidateException as e:
65 | logging.error(e.message)
66 | # Extract LinkedIn Accounts
67 | if parsed_link.netloc.endswith('.linkedin.com'):
68 | try:
69 | # Create the asset
70 | asset = repository.get_asset_string(
71 | inquisitor.assets.linkedin.LinkedIn,
72 | item['link'],
73 | create=True,
74 | )[1]
75 | # Apply work around for acquiring the corporation
76 | if (item.get('pagemap') and
77 | item.get('pagemap').get('person') and
78 | item.get('pagemap').get('person')[0].get('org')):
79 | asset.corporation = inquisitor.assets.registrant.canonicalize(
80 | item.get('pagemap').get('person')[0].get('org')
81 | )
82 | # Add the asset
83 | assets.add(asset)
84 | except inquisitor.assets.linkedin.LinkedInValidateException as e:
85 | logging.error(e.message)
86 | # TODO: extract accounts for other social media networks
87 | return assets
--------------------------------------------------------------------------------
/inquisitor/sources/shodan_search.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets.host
2 | import inquisitor.assets.registrant
3 | import logging
4 | import shodan
5 |
6 | class ShodanAPI:
7 |
8 | def __init__(self, api_key, limit=None):
9 | self.api_key = api_key
10 | self.service = shodan.Shodan(self.api_key)
11 | self.limit = limit
12 |
13 | def search(self, query):
14 | page = 1
15 | items = list()
16 | while True:
17 | if self.limit and page > self.limit:
18 | break
19 | results = self.service.search(query, page=page)
20 | if results.get('matches'):
21 | items.extend(results.get('matches'))
22 | if len(items) >= results['total']:
23 | break
24 | page += 1
25 | return items
26 |
27 | def transform(self, repository, query):
28 | assets = set()
29 | items = self.search(query)
30 | for item in items:
31 | # Extract ISP Registrant
32 | try:
33 | assets.add(repository.get_asset_string(
34 | inquisitor.assets.registrant.Registrant,
35 | item['isp'],
36 | create=True,
37 | )[1])
38 | except inquisitor.assets.registrant.RegistrantValidateException as e:
39 | logging.error(e.message)
40 | # Extract Organization Registrant
41 | try:
42 | assets.add(repository.get_asset_string(
43 | inquisitor.assets.registrant.Registrant,
44 | item['org'],
45 | create=True,
46 | )[1])
47 | except inquisitor.assets.registrant.RegistrantValidateException as e:
48 | logging.error(e.message)
49 | # Extract Host From Options
50 | if (item.get('_shodan') and item.get('_shodan').get('options') and
51 | item.get('_shodan').get('options').get('hostname')):
52 | try:
53 | assets.add(repository.get_asset_string(
54 | inquisitor.assets.host.Host,
55 | item['_shodan']['options']['hostname'],
56 | create=True,
57 | )[1])
58 | except inquisitor.assets.host.HostValidateException as e:
59 | logging.error(e.message)
60 | # Extract Host From HTTP
61 | if item.get('http') and item.get('http').get('host'):
62 | try:
63 | assets.add(repository.get_asset_string(
64 | inquisitor.assets.host.Host,
65 | item['http']['host'],
66 | create=True,
67 | )[1])
68 | except inquisitor.assets.host.HostValidateException as e:
69 | logging.error(e.message)
70 | # Extract Hosts From Hostnames
71 | for host in item['hostnames']:
72 | try:
73 | assets.add(repository.get_asset_string(
74 | inquisitor.assets.host.Host,
75 | host,
76 | create=True,
77 | )[1])
78 | except inquisitor.assets.host.HostValidateException as e:
79 | logging.error(e.message)
80 | # Extract Hosts From Domains
81 | for host in item['domains']:
82 | try:
83 | assets.add(repository.get_asset_string(
84 | inquisitor.assets.host.Host,
85 | host,
86 | create=True,
87 | )[1])
88 | except inquisitor.assets.host.HostValidateException as e:
89 | logging.error(e.message)
90 | return assets
--------------------------------------------------------------------------------
/inquisitor/__init__.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets.block
2 | import inquisitor.assets.email
3 | import inquisitor.assets.host
4 | import inquisitor.assets.linkedin
5 | import inquisitor.assets.registrant
6 | import sys
7 | import unqlite
8 |
9 | ASSET_MODULES = [
10 | inquisitor.assets.registrant,
11 | inquisitor.assets.block,
12 | inquisitor.assets.host,
13 | inquisitor.assets.email,
14 | inquisitor.assets.linkedin,
15 | ]
16 |
17 | class IntelligenceRepository:
18 |
19 | def __init__(self, path):
20 | self.database = unqlite.UnQLite(path)
21 | self.repositories = dict()
22 | for asset_module in ASSET_MODULES:
23 | identifier = asset_module.REPOSITORY
24 | repository = self.database.collection(identifier)
25 | repository.create()
26 | self.repositories[identifier] = repository
27 |
28 | def get_asset_data(self, asset):
29 | module = sys.modules[asset.__class__.__module__]
30 | repository = self.repositories[module.REPOSITORY]
31 | identifier = module.OBJECT_ID
32 | query = getattr(asset, identifier)
33 | results = repository.filter(lambda a: a['data'][identifier] == query)
34 | return results[0] if results else None
35 |
36 | def get_asset_object(self, asset, create=False, store=False):
37 | result = self.get_asset_data(asset)
38 | asset_type = asset.__class__
39 | if result:
40 | __id = result['__id']
41 | data = result['data']
42 | obj = asset_type.__new__(asset_type)
43 | for name, value in data.iteritems():
44 | setattr(obj, name, value)
45 | obj.transforms = dict(obj.transforms)
46 | return (__id, obj)
47 | elif create:
48 | asset_module = sys.modules[asset_type.__module__]
49 | asset = asset_type(getattr(asset, asset_module.OBJECT_ID))
50 | result = (None, asset)
51 | if store:
52 | result[0] = self.put_asset_object(asset)
53 | return result
54 | return None
55 |
56 | def get_asset_string(
57 | self,
58 | asset_type,
59 | identifier,
60 | create=False,
61 | store=False
62 | ):
63 | query = asset_type.__new__(asset_type)
64 | module = sys.modules[asset_type.__module__]
65 | setattr(query, module.OBJECT_ID, identifier)
66 | return self.get_asset_object(query, create=create, store=store)
67 |
68 | def get_assets(self, include, limit=None):
69 | results = set()
70 | for asset_module in ASSET_MODULES:
71 | asset_class = asset_module.ASSET_CLASS
72 | repository = self.repositories[asset_module.REPOSITORY]
73 | index = 0
74 | for data in repository.all():
75 | data = data['data']
76 | obj = asset_class.__new__(asset_class)
77 | for name, value in data.iteritems():
78 | setattr(obj, name, value)
79 | obj.transforms = dict(obj.transforms)
80 | if include(obj, data):
81 | results.add(obj)
82 | index += 1
83 | if limit and index >= limit:
84 | break
85 | return results
86 |
87 | def put_asset_object(self, asset, overwrite=False):
88 | result = None
89 | module = sys.modules[asset.__class__.__module__]
90 | repository = self.repositories[module.REPOSITORY]
91 | exists = self.get_asset_data(asset)
92 | if not exists:
93 | result = repository.store({'data': asset.__dict__})
94 | elif overwrite:
95 | repository.update(exists['__id'], {'data': asset.__dict__})
96 | result = exists['__id']
97 | if not exists or overwrite:
98 | for related in asset.related(self):
99 | self.put_asset_object(related, overwrite=False)
100 | return result
101 |
102 | def put_asset_string(
103 | self,
104 | asset_type,
105 | identifier,
106 | owned=None,
107 | overwrite=False
108 | ):
109 | asset = asset_type(identifier, owned=owned)
110 | self.put_asset_object(asset, overwrite=overwrite)
111 |
--------------------------------------------------------------------------------
/inquisitor/assets/registrant.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets
2 | import unidecode
3 | import urlparse
4 |
5 | class RegistrantValidateException(Exception):
6 | pass
7 |
8 | def canonicalize(registrant):
9 | if not registrant:
10 | raise RegistrantValidateException('Registrants cannot be None')
11 | if not isinstance(registrant, str) and not isinstance(registrant, unicode):
12 | raise RegistrantValidateException('Registrants must be strings')
13 | registrant = unidecode.unidecode(unicode(registrant.strip())).upper()
14 | return registrant
15 |
16 | def main_classify_args(parser):
17 | parser.add_argument(
18 | '-ar', '--accept-registrant',
19 | metavar='REGISTRANT',
20 | type=canonicalize,
21 | nargs='+',
22 | help='Specifies a registrant to classify as accepted.',
23 | dest='registrants_accepted',
24 | default=list(),
25 | )
26 | parser.add_argument(
27 | '-ur', '--unmark-registrant',
28 | metavar='REGISTRANT',
29 | type=canonicalize,
30 | nargs='+',
31 | help='Specifies a registrant to classify as unmarked.',
32 | dest='registrants_unmarked',
33 | default=list(),
34 | )
35 | parser.add_argument(
36 | '-rr', '--reject-registrant',
37 | metavar='REGISTRANT',
38 | type=canonicalize,
39 | nargs='+',
40 | help='Specifies a registrant to classify as rejected.',
41 | dest='registrants_rejected',
42 | default=list(),
43 | )
44 |
45 | def main_classify_canonicalize(args):
46 | accepted = set(args.registrants_accepted)
47 | unmarked = set(args.registrants_unmarked)
48 | rejected = set(args.registrants_rejected)
49 | redundant = set.intersection(accepted, unmarked, rejected)
50 | if redundant:
51 | raise ValueError(
52 | ('Conflicting classifications for registrants '
53 | ': {}').format(list(redundant))
54 | )
55 | accepted = set([canonicalize(a) for a in accepted])
56 | unmarked = set([canonicalize(a) for a in unmarked])
57 | rejected = set([canonicalize(a) for a in rejected])
58 | return (accepted, unmarked, rejected)
59 |
60 | class Registrant(inquisitor.assets.Asset):
61 |
62 | def __init__(self, registrant, owned=None):
63 | super(self.__class__, self).__init__(owned=owned)
64 | self.registrant = canonicalize(registrant)
65 |
66 | def __eq__(self, other):
67 | if not isinstance(other, self.__class__):
68 | return False
69 | return self.registrant == other.registrant
70 |
71 | def related(self, repo):
72 | # Prepare the results
73 | results = set()
74 | # Return the results
75 | return results
76 |
77 | def transform(self, repo, sources):
78 | # Prepare the results
79 | assets = set()
80 | # Google Transforms
81 | if sources.get('google'):
82 | subassets = self.cache_transform_get('google', repo)
83 | if not subassets:
84 | # Acquire API
85 | google = sources['google']
86 | # Query: Plain
87 | subassets.update(google.transform(repo, self.registrant))
88 | # Query: LinkedIn
89 | subassets.update(google.transform(
90 | repo, 'site:linkedin.com {}'.format(self.registrant)
91 | ))
92 | # Cache The Transform
93 | self.cache_transform_store('google', subassets)
94 | assets.update(subassets)
95 | # Shodan Transforms
96 | if sources.get('shodan'):
97 | subassets = self.cache_transform_get('shodan', repo)
98 | if not subassets:
99 | # Acquire API
100 | shodan = sources['shodan']
101 | # Query: Plain
102 | subassets.update(shodan.transform(repo, self.registrant))
103 | # Query: Organization
104 | subassets.update(shodan.transform(
105 | repo, 'org:"{}"'.format(self.registrant))
106 | )
107 | # Cache The Transform
108 | self.cache_transform_store('shodan', subassets)
109 | assets.update(subassets)
110 | # Return the results
111 | return assets
112 |
113 | def is_owned(self, repo):
114 | if self.owned:
115 | return True
116 | return False
117 |
118 | def parent_asset(self, repo):
119 | # Registrants don't have parents
120 | return None
121 |
122 | REPOSITORY = 'registrants'
123 | ASSET_CLASS = Registrant
124 | OBJECT_ID = 'registrant'
--------------------------------------------------------------------------------
/inquisitor/assets/email.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets
2 | import inquisitor.assets.host
3 | import logging
4 | import validate_email
5 |
6 | class EmailValidateException(Exception):
7 | pass
8 |
9 | def canonicalize(email):
10 | if not email:
11 | raise EmailValidateException('Emails cannot be None')
12 | if not isinstance(email, str) and not isinstance(email, unicode):
13 | raise EmailValidateException('Emails must be strings')
14 | email = email.strip()
15 | if not validate_email.validate_email(email):
16 | raise EmailValidateException(
17 | 'Unable to validate email {}'.format(email)
18 | )
19 | recipient, domain = email.split('@')
20 | try:
21 | domain = inquisitor.assets.host.canonicalize(domain)
22 | except inquisitor.assets.host.HostValidateException:
23 | raise EmailValidateException(
24 | 'Unable to validate domain for email {}'.format(email)
25 | )
26 | return '@'.join([recipient, domain])
27 |
28 | def main_classify_args(parser):
29 | parser.add_argument(
30 | '-ae', '--accept-email',
31 | metavar='EMAIL',
32 | type=canonicalize,
33 | nargs='+',
34 | help='Specifies a email to classify as accepted.',
35 | dest='emails_accepted',
36 | default=list(),
37 | )
38 | parser.add_argument(
39 | '-ue', '--unmark-email',
40 | metavar='EMAIL',
41 | type=canonicalize,
42 | nargs='+',
43 | help='Specifies a email to classify as unmarked.',
44 | dest='emails_unmarked',
45 | default=list(),
46 | )
47 | parser.add_argument(
48 | '-re', '--reject-email',
49 | metavar='EMAIL',
50 | type=canonicalize,
51 | nargs='+',
52 | help='Specifies a email to classify as rejected.',
53 | dest='emails_rejected',
54 | default=list(),
55 | )
56 |
57 | def main_classify_canonicalize(args):
58 | accepted = set(args.emails_accepted)
59 | unmarked = set(args.emails_unmarked)
60 | rejected = set(args.emails_rejected)
61 | redundant = set.intersection(accepted, unmarked, rejected)
62 | if redundant:
63 | raise ValueError(
64 | ('Conflicting classifications for emails '
65 | ': {}').format(list(redundant))
66 | )
67 | accepted = set([canonicalize(a) for a in accepted])
68 | unmarked = set([canonicalize(a) for a in unmarked])
69 | rejected = set([canonicalize(a) for a in rejected])
70 | return (accepted, unmarked, rejected)
71 |
72 | class Email(inquisitor.assets.Asset):
73 |
74 | def __init__(self, email, owned=None):
75 | super(self.__class__, self).__init__(owned=owned)
76 | self.email = canonicalize(email)
77 | recipient, domain = self.email.split('@')
78 | self.recipient = recipient
79 | self.domain = domain
80 |
81 | def __eq__(self, other):
82 | if not isinstance(other, self.__class__):
83 | return False
84 | return self.email == other.email
85 |
86 | def related(self, repo):
87 | # Prepare results
88 | results = set()
89 | # Related: Domain
90 | try:
91 | results.add(repo.get_asset_string(
92 | inquisitor.assets.host.Host,
93 | self.domain,
94 | create=True,
95 | )[1])
96 | except inquisitor.assets.host.HostValidateException as e:
97 | logging.error(e.message)
98 | # Return the results
99 | return results
100 |
101 | def transform(self, repo, sources):
102 | # Prepare the results
103 | assets = set()
104 | # Google Transforms
105 | if sources.get('google'):
106 | subassets = self.cache_transform_get('google', repo)
107 | if not subassets:
108 | # Acquire API
109 | google = sources['google']
110 | # Query: Email
111 | subassets.update(google.transform(
112 | repo, '"{}"'.format(self.email))
113 | )
114 | # Cache The Transform
115 | self.cache_transform_store('google', subassets)
116 | assets.update(subassets)
117 | # Return the results
118 | return assets
119 |
120 | def is_owned(self, repo):
121 | # If manually classified, return the classification
122 | if self.owned is not None:
123 | return self.owned
124 | # Automatically determine ownership
125 | try:
126 | host = repo.get_asset_string(inquisitor.assets.host.Host, self.domain)
127 | if host and host[1].is_owned(repo):
128 | return True
129 | except inquisitor.assets.host.HostValidateException as e:
130 | logging.error(e.message)
131 | return False
132 |
133 | def parent_asset(self, repo):
134 | # Prepare result variable
135 | parent = None
136 | # Check if this email's domain is a valid parent
137 | if parent is None:
138 | if self.domain:
139 | try:
140 | host = repo.get_asset_string(inquisitor.assets.host.Host, self.domain)
141 | if host and host[1].is_owned(repo):
142 | parent = host[1]
143 | return parent
144 | except inquisitor.assets.host.HostValidateException as e:
145 | logging.error(e.message)
146 | # If no parental candidate is found, return None
147 | return None
148 |
149 | REPOSITORY = 'emails'
150 | ASSET_CLASS = Email
151 | OBJECT_ID = 'email'
--------------------------------------------------------------------------------
/inquisitor/assets/linkedin.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets
2 | import inquisitor.assets.registrant
3 | import logging
4 | import urlparse
5 |
6 | class LinkedInValidateException(Exception):
7 | pass
8 |
9 | def canonicalize(linkedin):
10 | if not linkedin:
11 | raise LinkedInValidateException('LinkedIn accounts cannot be None')
12 | if not isinstance(linkedin, str) and not isinstance(linkedin, unicode):
13 | raise LinkedInValidateException('LinkedIn accounts must be strings')
14 | # Validate URL
15 | linkedin = linkedin.strip().lower()
16 | parsed = urlparse.urlparse(linkedin)
17 | # Validate Network Location
18 | if not parsed.netloc.endswith('.linkedin.com'):
19 | raise LinkedInValidateException(
20 | 'Failed to validate LinkedIn account: {}'.format(linkedin)
21 | )
22 | # Validate Path
23 | if not parsed.path.startswith('/in/'):
24 | raise LinkedInValidateException(
25 | 'Failed to validate LinkedIn account: {}'.format(linkedin)
26 | )
27 | # Return the URL
28 | return linkedin
29 |
30 | def main_classify_args(parser):
31 | parser.add_argument(
32 | '-al', '--accept-linkedin',
33 | metavar='LINKEDIN',
34 | type=canonicalize,
35 | nargs='+',
36 | help='Specifies a LinkedIn Account to classify as accepted.',
37 | dest='linkedin_accepted',
38 | default=list(),
39 | )
40 | parser.add_argument(
41 | '-ul', '--unmark-linkedin',
42 | metavar='LINKEDIN',
43 | type=canonicalize,
44 | nargs='+',
45 | help='Specifies a LinkedIn Account to classify as unmarked.',
46 | dest='linkedin_unmarked',
47 | default=list(),
48 | )
49 | parser.add_argument(
50 | '-rl', '--reject-linkedin',
51 | metavar='LINKEDIN',
52 | type=canonicalize,
53 | nargs='+',
54 | help='Specifies a LinkedIn Account to classify as rejected.',
55 | dest='linkedin_rejected',
56 | default=list(),
57 | )
58 |
59 | def main_classify_canonicalize(args):
60 | accepted = set(args.linkedin_accepted)
61 | unmarked = set(args.linkedin_unmarked)
62 | rejected = set(args.linkedin_rejected)
63 | redundant = set.intersection(accepted, unmarked, rejected)
64 | if redundant:
65 | raise ValueError(
66 | ('Conflicting classifications for LinkedIn Accounts '
67 | ': {}').format(list(redundant))
68 | )
69 | accepted = set([canonicalize(a) for a in accepted])
70 | unmarked = set([canonicalize(a) for a in unmarked])
71 | rejected = set([canonicalize(a) for a in rejected])
72 | return (accepted, unmarked, rejected)
73 |
74 | class LinkedIn(inquisitor.assets.Asset):
75 |
76 | def __init__(self, linkedin, owned=False):
77 | super(self.__class__, self).__init__(owned=owned)
78 | self.linkedin = canonicalize(linkedin)
79 | self.username = urlparse.urlparse(self.linkedin).path.split('/')[2]
80 | # TODO: This should be retrieved using linkedin api but we don't have
81 | # TODO: time for that, so fill it up using Google Search results
82 | # TODO: instead
83 | self.corporation = None
84 |
85 | def __eq__(self, other):
86 | if not isinstance(other, self.__class__):
87 | return False
88 | return self.linkedin == other.linkedin
89 |
90 | def related(self, repo):
91 | # Prepare the results
92 | results = set()
93 | # Related: Corporation
94 | if self.corporation:
95 | try:
96 | results.add(repo.get_asset_string(
97 | inquisitor.assets.registrant.Registrant,
98 | self.corporation,
99 | create=True,
100 | )[1])
101 | except inquisitor.assets.registrant.RegistrantValidateException as e:
102 | logging.error(e.message)
103 | # Return the results
104 | return results
105 |
106 | def transform(self, repo, sources):
107 | # Prepare the results
108 | assets = set()
109 | # Return the results
110 | return assets
111 |
112 | def is_owned(self, repo):
113 | # If manually classified, return the classification
114 | if self.owned is not None:
115 | return self.owned
116 | # Automatically determine ownership
117 | if self.corporation:
118 | try:
119 | registrant = repo.get_asset_string(
120 | inquisitor.assets.registrant.Registrant,
121 | self.corporation
122 | )
123 | if registrant and registrant[1].is_owned(repo):
124 | return True
125 | except inquisitor.assets.registrant.RegistrantValidateException as e:
126 | logging.error(e.message)
127 | return False
128 |
129 | def parent_asset(self, repo):
130 | # Prepare result variable
131 | parent = None
132 | # Check if registrant is a valid parent
133 | if parent is None:
134 | if self.corporation:
135 | try:
136 | registrant = repo.get_asset_string(
137 | inquisitor.assets.registrant.Registrant,
138 | self.corporation,
139 | )
140 | if registrant and registrant[1].is_owned(repo):
141 | parent = registrant[1]
142 | return parent
143 | except inquisitor.assets.registrant.RegistrantValidateException as e:
144 | logging.error(e.message)
145 | # If no parental candidate is found, return None
146 | return None
147 |
148 | REPOSITORY = 'linkedins'
149 | ASSET_CLASS = LinkedIn
150 | OBJECT_ID = 'linkedin'
--------------------------------------------------------------------------------
/tests/test_inquisitor.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | import inq
3 |
4 | def setup():
5 | pass
6 |
7 | def teardown():
8 | pass
9 |
10 | def test_inquisitor():
11 | # Accept Host
12 | inq.main(['classify', 'coke.db', '-ah', 'coca-cola.com'])
13 | # Accept Registrants
14 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA ENTERPRISES'])
15 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA BOTTLING COMPANY OF MINDEN, INC.'])
16 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA BOTTLING COMPANY OF MINDEN'])
17 | inq.main(['classify', 'coke.db', '-ar', 'COCA COLA NETWORK REDIRECT'])
18 | inq.main(['classify', 'coke.db', '-ar', 'THE COCA-COLA COMPANY'])
19 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA HBC SERVICES MEPE'])
20 | # Reject Registrants
21 | inq.main(['classify', 'coke.db', '-rr', 'BH MEDIA GROUP INC.'])
22 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON TECHNOLOGIES'])
23 | inq.main(['classify', 'coke.db', '-rr', 'LINODE'])
24 | inq.main(['classify', 'coke.db', '-rr', 'HUBSPOT INC.'])
25 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON.COM'])
26 | inq.main(['classify', 'coke.db', '-rr', 'LINKEDIN CORPORATION'])
27 | inq.main(['classify', 'coke.db', '-rr', 'LEAF GROUP, LTD.'])
28 | inq.main(['classify', 'coke.db', '-rr', 'LEAF GROUP LTD.'])
29 | inq.main(['classify', 'coke.db', '-rr', 'DOMAIN PROTECTION SERVICES, INC.'])
30 | inq.main(['classify', 'coke.db', '-rr', 'CHRIS GASTON'])
31 | inq.main(['classify', 'coke.db', '-rr', 'NSONE INC'])
32 | inq.main(['classify', 'coke.db', '-rr', 'INTERNATIONAL MOTORSPORTS ASSOCIATION, LLC'])
33 | inq.main(['classify', 'coke.db', '-rr', 'SHYAMA ECONSULTANCY LIMITED'])
34 | inq.main(['classify', 'coke.db', '-rr', 'ALEXA INTERNET'])
35 | inq.main(['classify', 'coke.db', '-rr', 'POOL PRODUCTIONS'])
36 | inq.main(['classify', 'coke.db', '-rr', 'TWITTER, INC.'])
37 | inq.main(['classify', 'coke.db', '-rr', 'TWITTER INC.'])
38 | inq.main(['classify', 'coke.db', '-rr', 'INSTAGRAM, LLC'])
39 | inq.main(['classify', 'coke.db', '-rr', 'FACEBOOK, INC.'])
40 | inq.main(['classify', 'coke.db', '-rr', 'REGISTRARSEC, LLC'])
41 | inq.main(['classify', 'coke.db', '-rr', 'GET SATISFACTION'])
42 | inq.main(['classify', 'coke.db', '-rr', 'GOOGLE INC.'])
43 | inq.main(['classify', 'coke.db', '-rr', 'AUTOMATTIC, INC.'])
44 | inq.main(['classify', 'coke.db', '-rr', 'AUTOMATTIC, INC'])
45 | inq.main(['classify', 'coke.db', '-rr', 'DOMAINS BY PROXY, LLC'])
46 | inq.main(['classify', 'coke.db', '-rr', 'WHOIS PRIVACY SERVICE'])
47 | inq.main(['classify', 'coke.db', '-rr', 'SINGLEHOP, INC.'])
48 | inq.main(['classify', 'coke.db', '-rr', 'CYBERNET QUEST'])
49 | inq.main(['classify', 'coke.db', '-rr', 'QWEST COMMUNICATIONS COMPANY, LLC'])
50 | inq.main(['classify', 'coke.db', '-rr', 'GKG.NET DOMAIN PROXY SERVICE'])
51 | inq.main(['classify', 'coke.db', '-rr', 'INTRAWORLD COMMUNICATIONS CORPORATION'])
52 | inq.main(['classify', 'coke.db', '-rr', 'HYDROSOFT INTERNET'])
53 | inq.main(['classify', 'coke.db', '-rr', 'KSREGISTRY GMBH'])
54 | inq.main(['classify', 'coke.db', '-rr', 'OVH HOSTING, INC.'])
55 | inq.main(['classify', 'coke.db', '-rr', 'WOODYNET'])
56 | inq.main(['classify', 'coke.db', '-rr', 'KEY-SYSTEMS GMBH'])
57 | inq.main(['classify', 'coke.db', '-rr', 'ACTIVE MINDS GMBH'])
58 | inq.main(['classify', 'coke.db', '-rr', 'LIQUID WEB, L.L.C'])
59 | inq.main(['classify', 'coke.db', '-rr', 'WHOIS INC'])
60 | inq.main(['classify', 'coke.db', '-rr', 'PUBLICDOMAINREGISTRY.COM'])
61 | inq.main(['classify', 'coke.db', '-rr', 'CLOUDFLARE, INC.'])
62 | inq.main(['classify', 'coke.db', '-rr', 'PDR LTD.'])
63 | inq.main(['classify', 'coke.db', '-rr', 'ADVAMEG, INC.'])
64 | inq.main(['classify', 'coke.db', '-rr', 'INKTOMI CORPORATION'])
65 | inq.main(['classify', 'coke.db', '-rr', 'YAHOO! INC.'])
66 | inq.main(['classify', 'coke.db', '-rr', 'YAHOO! BROADCAST SERVICES, INC.'])
67 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER TECHNOLOGIES, INC.'])
68 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER CORPORATE C'])
69 | inq.main(['classify', 'coke.db', '-rr', 'DNSTINATION INC.'])
70 | inq.main(['classify', 'coke.db', '-rr', 'INTERNETNAMESFORBUSINESS.COM'])
71 | inq.main(['classify', 'coke.db', '-rr', 'INTERNATIONAL BUSINESS MACHINES CORPORATION'])
72 | inq.main(['classify', 'coke.db', '-rr', 'IBM'])
73 | inq.main(['classify', 'coke.db', '-rr', 'ENOM, INCORPORATED'])
74 | inq.main(['classify', 'coke.db', '-rr', 'OVH (NWK)'])
75 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER TECHNOLOGIES INC.'])
76 | inq.main(['classify', 'coke.db', '-rr', 'INFORMER TECHNOLOGIES, INC.'])
77 | inq.main(['classify', 'coke.db', '-rr', 'GODADDY.COM, LLC'])
78 | inq.main(['classify', 'coke.db', '-rr', 'GO DADDY OPERATING COMPANY, LLC'])
79 | inq.main(['classify', 'coke.db', '-rr', 'TUCOWS.COM CO.'])
80 | inq.main(['classify', 'coke.db', '-rr', 'TUCOWS.COM CO'])
81 | inq.main(['classify', 'coke.db', '-rr', 'NEUSTAR, INC.'])
82 | inq.main(['classify', 'coke.db', '-rr', 'DYN'])
83 | inq.main(['classify', 'coke.db', '-rr', 'DYN INC'])
84 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON TECHNOLOGIES, INC.'])
85 | inq.main(['classify', 'coke.db', '-rr', 'AKAMAI TECHNOLOGIES, INC.'])
86 | inq.main(['classify', 'coke.db', '-rr', 'MARKMONITOR'])
87 | inq.main(['classify', 'coke.db', '-rr', 'DYNAMIC NETWORK SERVICES, INC.'])
88 | inq.main(['classify', 'coke.db', '-rr', 'MARKMONITOR INC.'])
89 | inq.main(['classify', 'coke.db', '-rr', 'WIKIMEDIA FOUNDATION INC.'])
90 | inq.main(['classify', 'coke.db', '-rr', 'WIKIMEDIA FOUNDATION, INC.'])
91 | inq.main(['classify', 'coke.db', '-rr', 'INSALA, LLC'])
92 | # Test Dump
93 | inq.main(['dump', 'coke.db', '-j', 'coke.json'])
94 | # Test Status
95 | inq.main(['status', 'coke.db'])
96 | # Test Status Strong
97 | inq.main(['status', 'coke.db', '--strong'])
--------------------------------------------------------------------------------
/inquisitor/assets/block.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets
2 | import inquisitor.assets.registrant
3 | import ipwhois
4 | import logging
5 | import netaddr
6 |
7 | class BlockValidateException(Exception):
8 | pass
9 |
10 | def canonicalize(block):
11 | if not block:
12 | raise BlockValidateException('Blocks cannot be None')
13 | if not isinstance(block, str) and not isinstance(block, unicode):
14 | raise BlockValidateException('Blocks must be strings')
15 | network = None
16 | try:
17 | network = netaddr.IPNetwork(block)
18 | except netaddr.core.AddrFormatError:
19 | raise BlockValidateException('Unable to parse block {}'.format(block))
20 | return str(network)
21 |
22 | def main_classify_args(parser):
23 | parser.add_argument(
24 | '-ab', '--accept-block',
25 | metavar='BLOCK',
26 | type=canonicalize,
27 | nargs='+',
28 | help='Specifies a block to classify as accepted.',
29 | dest='blocks_accepted',
30 | default=list(),
31 | )
32 | parser.add_argument(
33 | '-ub', '--unmark-block',
34 | metavar='BLOCK',
35 | type=canonicalize,
36 | nargs='+',
37 | help='Specifies a block to classify as unmarked.',
38 | dest='blocks_unmarked',
39 | default=list(),
40 | )
41 | parser.add_argument(
42 | '-rb', '--reject-block',
43 | metavar='BLOCK',
44 | type=canonicalize,
45 | nargs='+',
46 | help='Specifies a block to classify as rejected.',
47 | dest='blocks_rejected',
48 | default=list(),
49 | )
50 |
51 | def main_classify_canonicalize(args):
52 | accepted = set(args.blocks_accepted)
53 | unmarked = set(args.blocks_unmarked)
54 | rejected = set(args.blocks_rejected)
55 | redundant = set.intersection(accepted, unmarked, rejected)
56 | if redundant:
57 | raise ValueError(
58 | ('Conflicting classifications for blocks '
59 | ': {}').format(list(redundant))
60 | )
61 | accepted = set([canonicalize(a) for a in accepted])
62 | unmarked = set([canonicalize(a) for a in unmarked])
63 | rejected = set([canonicalize(a) for a in rejected])
64 | return (accepted, unmarked, rejected)
65 |
66 | class Block(inquisitor.assets.Asset):
67 |
68 | def __init__(self, block, owned=None):
69 | super(self.__class__, self).__init__(owned=owned)
70 | self.block = canonicalize(block)
71 | # Acquire IP whois for block
72 | ip = str(netaddr.IPNetwork(self.block).ip)
73 | info = ipwhois.ipwhois.IPWhois(ip).lookup_rdap()
74 | self.registrant = None
75 | if (info.get('network') and info.get('network').get('cidr')
76 | and info.get('network').get('cidr') == self.block):
77 | for key, obj in info['objects'].iteritems():
78 | if obj.get('roles') and 'registrant' in obj.get('roles'):
79 | if obj.get('contact') and obj.get('contact').get('kind') == 'org':
80 | name = obj['contact']['name']
81 | registrant = inquisitor.assets.registrant.canonicalize(
82 | name
83 | )
84 | self.registrant = registrant
85 | break
86 |
87 | def __eq__(self, other):
88 | if not isinstance(other, self.__class__):
89 | return False
90 | return self.block == other.block
91 |
92 | def related(self, repo):
93 | # Prepare the results
94 | results = set()
95 | # Related: Registrant
96 | if self.registrant:
97 | try:
98 | results.add(repo.get_asset_string(
99 | inquisitor.assets.registrant.Registrant,
100 | self.registrant,
101 | create=True,
102 | )[1])
103 | except inquisitor.assets.registrant.RegistrantValidateException as e:
104 | logging.error(e.message)
105 | # Return the results
106 | return results
107 |
108 | def transform(self, repo, sources):
109 | # Prepare the results
110 | assets = set()
111 | # Shodan Transforms
112 | if sources.get('shodan'):
113 | subassets = self.cache_transform_get('shodan', repo)
114 | if not subassets:
115 | # Acquire API
116 | shodan = sources['shodan']
117 | # Query: Network
118 | subassets.update(shodan.transform(
119 | repo, 'net:"{}"'.format(self.block))
120 | )
121 | # Cache The Transform
122 | self.cache_transform_store('shodan', subassets)
123 | assets.update(subassets)
124 | # Return the results
125 | return assets
126 |
127 | def is_owned(self, repo):
128 | # If manually classified, return the classification
129 | if self.owned is not None:
130 | return self.owned
131 | # Automatically determine ownership
132 | if self.registrant:
133 | try:
134 | registrant = repo.get_asset_string(
135 | inquisitor.assets.registrant.Registrant,
136 | self.registrant,
137 | )
138 | if registrant and registrant[1].is_owned(repo):
139 | return True
140 | except inquisitor.assets.registrant.RegistrantValidateException as e:
141 | logging.error(e.message)
142 | return False
143 |
144 | def parent_asset(self, repo):
145 | # Prepare result variable
146 | parent = None
147 | # Check if this is a child of another netblock
148 | if parent is None:
149 | # Acquire start and end IPs of this netblock
150 | network = netaddr.IPNetwork(self.block)
151 | network_start = network.ip & network.netmask
152 | network_end = network_start + (network.size - 1)
153 | # Acquire other owned netblocks
154 | blocks = repo.get_assets(
155 | include=lambda o,d: (
156 | self != o and
157 | isinstance(o, self.__class__) and
158 | o.is_owned(repo)
159 | )
160 | )
161 | # Check if this netblock is a child of another netblock
162 | parents = list()
163 | for block in blocks:
164 | # Acquire start and end IPs of the other netblock
165 | other = netaddr.IPNetwork(block.block)
166 | other_start = other.ip & other.netmask
167 | other_end = other_start + (other.size - 1)
168 | # Check if self is contained by the other netblock
169 | contained = (
170 | other_start <= network_start and
171 | network_end <= other_end and
172 | network.size < other.size
173 | )
174 | # If contained, add as potential parent
175 | candidate = [block, other]
176 | if contained and candidate not in parents:
177 | parents.append(candidate)
178 | # Return the smallest parent
179 | if parents:
180 | parent = min(parents, key=lambda e: e[1])[0]
181 | return parent
182 | # Check if registrant is a valid parent
183 | if parent is None:
184 | if self.registrant:
185 | try:
186 | registrant = repo.get_asset_string(
187 | inquisitor.assets.registrant.Registrant,
188 | self.registrant,
189 | )
190 | if registrant and registrant[1].is_owned(repo):
191 | parent = registrant[1]
192 | return parent
193 | except inquisitor.assets.registrant.RegistrantValidateException as e:
194 | logging.error(e.message)
195 | # If no parental candidate is found, return None
196 | return None
197 |
198 | REPOSITORY = 'blocks'
199 | ASSET_CLASS = Block
200 | OBJECT_ID = 'block'
--------------------------------------------------------------------------------
/inquisitor/assets/host.py:
--------------------------------------------------------------------------------
1 | import inquisitor.assets
2 | import inquisitor.assets.block
3 | import inquisitor.assets.email
4 | import inquisitor.assets.registrant
5 | import ipwhois
6 | import logging
7 | import netaddr
8 | import socket
9 | import tld
10 | import whois
11 |
12 | class HostValidateException(Exception):
13 | pass
14 |
15 | def canonicalize(host):
16 | if not host:
17 | raise HostValidateException('Hosts cannot be None')
18 | if not isinstance(host, str) and not isinstance(host, unicode):
19 | raise HostValidateException('Hosts must be strings')
20 | host = host.strip().lower()
21 | try:
22 | tld.get_tld('http://{}'.format(host))
23 | except tld.exceptions.TldDomainNotFound:
24 | raise HostValidateException('Invalid tld for host {}'.format(host))
25 | return host
26 |
27 | def main_classify_args(parser):
28 | parser.add_argument(
29 | '-ah', '--accept-host',
30 | metavar='HOST',
31 | type=canonicalize,
32 | nargs='+',
33 | help='Specifies a host to classify as accepted.',
34 | dest='hosts_accepted',
35 | default=list(),
36 | )
37 | parser.add_argument(
38 | '-uh', '--unmark-host',
39 | metavar='HOST',
40 | type=canonicalize,
41 | nargs='+',
42 | help='Specifies a host to classify as unmarked.',
43 | dest='hosts_unmarked',
44 | default=list(),
45 | )
46 | parser.add_argument(
47 | '-rh', '--reject-host',
48 | metavar='HOST',
49 | type=canonicalize,
50 | nargs='+',
51 | help='Specifies a host to classify as rejected.',
52 | dest='hosts_rejected',
53 | default=list(),
54 | )
55 |
56 | def main_classify_canonicalize(args):
57 | accepted = set(args.hosts_accepted)
58 | unmarked = set(args.hosts_unmarked)
59 | rejected = set(args.hosts_rejected)
60 | redundant = set.intersection(accepted, unmarked, rejected)
61 | if redundant:
62 | raise ValueError(
63 | ('Conflicting classifications for hosts '
64 | ': {}').format(list(redundant))
65 | )
66 | accepted = set([canonicalize(a) for a in accepted])
67 | unmarked = set([canonicalize(a) for a in unmarked])
68 | rejected = set([canonicalize(a) for a in rejected])
69 | return (accepted, unmarked, rejected)
70 |
71 | class Host(inquisitor.assets.Asset):
72 |
73 | def __init__(self, host, owned=None):
74 | super(self.__class__, self).__init__(owned=owned)
75 | self.host = canonicalize(host)
76 | # Acquire parent domain
77 | self.parent = None
78 | zones = self.host.split('.')
79 | if len(zones) > 1:
80 | self.parent = canonicalize('.'.join(zones[1:]))
81 | # Acquire IP address
82 | self.ip = None
83 | try: self.ip = socket.gethostbyname(self.host)
84 | except: pass
85 | # Acquire whois information
86 | self.registrant = None
87 | self.emails = set()
88 | self.nameservers = set()
89 | if self.ip:
90 | info = whois.whois(self.host)
91 | if info.get('org'):
92 | self.registrant = inquisitor.assets.registrant.canonicalize(
93 | info['org']
94 | )
95 | if info.get('emails'):
96 | if type(info['emails']) is list:
97 | for email in info['emails']:
98 | self.emails.add(inquisitor.assets.email.canonicalize(email))
99 | elif type(info['emails']) in [str, unicode]:
100 | email = info['emails']
101 | self.emails.add(inquisitor.assets.email.canonicalize(email))
102 | if info.get('name_servers'):
103 | if type(info['name_servers']) is list:
104 | for nameserver in info['name_servers']:
105 | self.nameservers.add(canonicalize(nameserver))
106 | elif type(info['name_servers']) in [str, unicode]:
107 | nameserver = info['name_servers']
108 | self.nameservers.add(canonicalize(nameserver))
109 | self.emails = list(self.emails)
110 | self.nameservers = list(self.nameservers)
111 | # Acquire IP whois information
112 | self.blocks = set()
113 | if self.ip:
114 | tries = 0
115 | while tries < 3:
116 | try:
117 | info = ipwhois.ipwhois.IPWhois(self.ip).lookup_rdap()
118 | for block in info['network']['cidr'].split(','):
119 | block = inquisitor.assets.block.canonicalize(block.strip())
120 | self.blocks.add(block)
121 | tries += 1
122 | except ipwhois.exceptions.HTTPLookupError:
123 | continue
124 | except ipwhois.exceptions.HTTPRateLimitError:
125 | continue
126 | except ipwhois.exceptions.IPDefinedError:
127 | continue
128 | self.blocks = list(self.blocks)
129 |
130 | def __eq__(self, other):
131 | if not isinstance(other, self.__class__):
132 | return False
133 | return self.host == other.host
134 |
135 | def related(self, repo):
136 | # Prepare results
137 | results = set()
138 | # Related: Parent
139 | if self.parent and len(self.parent.split('.')) > 1:
140 | try:
141 | results.add(repo.get_asset_string(
142 | Host,
143 | self.parent,
144 | create=True,
145 | )[1])
146 | except HostValidateException as e:
147 | logging.error(e.message)
148 | # Related: Registrant
149 | if self.registrant:
150 | try:
151 | results.add(repo.get_asset_string(
152 | inquisitor.assets.registrant.Registrant,
153 | self.registrant,
154 | create=True,
155 | )[1])
156 | except inquisitor.assets.registrant.RegistrantValidateException as e:
157 | logging.error(e.message)
158 | # Related: Emails
159 | for email in self.emails:
160 | try:
161 | results.add(repo.get_asset_string(
162 | inquisitor.assets.email.Email,
163 | email,
164 | create=True,
165 | )[1])
166 | except inquisitor.assets.email.EmailValidateException as e:
167 | logging.error(e.message)
168 | # Related: Nameservers
169 | for nameserver in self.nameservers:
170 | try:
171 | results.add(repo.get_asset_string(
172 | Host,
173 | nameserver,
174 | create=True,
175 | )[1])
176 | except HostValidateException as e:
177 | logging.error(e.message)
178 | # Related: Blocks
179 | for block in self.blocks:
180 | try:
181 | results.add(repo.get_asset_string(
182 | inquisitor.assets.block.Block,
183 | block,
184 | create=True,
185 | )[1])
186 | except inquisitor.assets.block.BlockValidateException as e:
187 | logging.error(e.message)
188 | # Return the results
189 | return results
190 |
191 | def transform(self, repo, sources):
192 | # Prepare the results
193 | assets = set()
194 | # Google Transforms
195 | if sources.get('google'):
196 | subassets = self.cache_transform_get('google', repo)
197 | if not subassets:
198 | # Acquire API
199 | google = sources['google']
200 | # Query: Site
201 | subassets.update(google.transform(
202 | repo, 'site:{}'.format(self.host))
203 | )
204 | # Query: Email
205 | subassets.update(google.transform(
206 | repo, '"@{}"'.format(self.host))
207 | )
208 | # Cache The Transform
209 | self.cache_transform_store('google', subassets)
210 | assets.update(subassets)
211 | # Shodan Transforms
212 | if sources.get('shodan'):
213 | subassets = self.cache_transform_get('shodan', repo)
214 | if not subassets:
215 | # Acquire API
216 | shodan = sources['shodan']
217 | # Query: Plain
218 | subassets.update(shodan.transform(repo, self.host))
219 | # Query: Hostname
220 | subassets.update(shodan.transform(
221 | repo, 'hostname:"{}"'.format(self.host))
222 | )
223 | # Cache The Transform
224 | self.cache_transform_store('shodan', subassets)
225 | assets.update(subassets)
226 | # Return the results
227 | return assets
228 |
229 | def is_owned(self, repo):
230 | # If manually classified, return the classification
231 | if self.owned is not None:
232 | return self.owned
233 | # Automatically determine ownership
234 | if self.parent:
235 | try:
236 | parent = repo.get_asset_string(Host, self.parent)
237 | if parent and parent[1].is_owned(repo):
238 | return True
239 | except HostValidateException as e:
240 | logging.error(e.message)
241 | if self.registrant:
242 | try:
243 | registrant = repo.get_asset_string(
244 | inquisitor.assets.registrant.Registrant,
245 | self.registrant
246 | )
247 | if registrant and registrant[1].is_owned(repo):
248 | return True
249 | except inquisitor.assets.registrant.RegistrantValidateException as e:
250 | logging.error(e.message)
251 | return False
252 |
253 | def parent_asset(self, repo):
254 | # Prepare result variable
255 | parent = None
256 | # Check if this host is the child of another domain
257 | if parent is None:
258 | if self.parent:
259 | try:
260 | domain = repo.get_asset_string(Host, self.parent)
261 | if domain and domain[1].is_owned(repo):
262 | parent = domain[1]
263 | return parent
264 | except HostValidateException as e:
265 | logging.error(e.message)
266 | # Check if this host is the child of a network
267 | if parent is None:
268 | if self.ip:
269 | # Acquire owned netblocks where self is contained
270 | address = netaddr.IPAddress(self.ip)
271 | blocks = repo.get_assets(
272 | include=lambda o,d: (
273 | isinstance(o, inquisitor.assets.block.Block) and
274 | o.is_owned(repo) and
275 | address in netaddr.IPNetwork(o.block)
276 | )
277 | )
278 | # Collate blocks and their corresponding network object
279 | parents = [
280 | [block, netaddr.IPNetwork(block.block)]
281 | for block in blocks
282 | ]
283 | # Return the smallest parent
284 | if parents:
285 | parent = min(parents, key=lambda e: e[1])[0]
286 | return parent
287 | # Check if registrant is a valid parent
288 | if parent is None:
289 | if self.registrant:
290 | try:
291 | registrant = repo.get_asset_string(
292 | inquisitor.assets.registrant.Registrant,
293 | self.registrant,
294 | )
295 | if registrant and registrant[1].is_owned(repo):
296 | parent = registrant[1]
297 | return parent
298 | except inquisitor.assets.registrant.RegistrantValidateException as e:
299 | logging.error(e.message)
300 | # If no parental candidate is found, return None
301 | return None
302 |
303 | REPOSITORY = 'hosts'
304 | ASSET_CLASS = Host
305 | OBJECT_ID = 'host'
--------------------------------------------------------------------------------
/inq:
--------------------------------------------------------------------------------
1 | import argparse
2 | import inquisitor
3 | import inquisitor.sources.google_search
4 | import inquisitor.sources.shodan_search
5 | import json
6 | import logging
7 | import os
8 | import SimpleHTTPServer
9 | import SocketServer
10 | import sys
11 | import tabulate
12 | import webbrowser
13 |
14 | # Ininitialize Logging
15 | logger = logging.getLogger(__name__)
16 | logger.setLevel(logging.INFO)
17 | handler = logging.StreamHandler(sys.stdout)
18 | handler.setLevel(logging.INFO)
19 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20 | handler.setFormatter(formatter)
21 | logger.addHandler(handler)
22 |
23 | def database(path):
24 | return inquisitor.IntelligenceRepository(path)
25 |
26 | def scan(
27 | repository,
28 | google_dev_key=None,
29 | google_cse_id=None,
30 | google_limit=None,
31 | shodan_api_key=None,
32 | shodan_limit=None,
33 | ):
34 | sources = dict()
35 | # Initialize Google as a transform source
36 | if not google_dev_key or not google_cse_id:
37 | if not google_dev_key:
38 | logger.warning(
39 | 'Skipping Google Transforms. No GOOGLE_DEV_KEY provided. '
40 | 'Please provide the GOOGLE_DEV_KEY using the --google-dev-key '
41 | 'parameter.'
42 | )
43 | if not google_cse_id:
44 | logger.warning(
45 | 'Skipping Google Transforms. No GOOGLE_CSE_ID provided. '
46 | 'Please provide the GOOGLE_CSE_ID using the --google-cse-id '
47 | 'parameter.'
48 | )
49 | else:
50 | sources['google'] = inquisitor.sources.google_search.GoogleAPI(
51 | google_dev_key, google_cse_id, limit=google_limit
52 | )
53 | if not google_limit:
54 | logger.warning(
55 | 'Google Search limit not set. This may potentially exhaust '
56 | 'the daily quota of your Google API Key.'
57 | )
58 | # Initialize Shodan as a transform source
59 | if not shodan_api_key:
60 | logger.warning(
61 | 'Skipping Shodan Transforms. No SHODAN_API_KEY provided. '
62 | 'Please provide the SHODAN_API_KEY using the --shodan-api-key '
63 | 'parameter.'
64 | )
65 | else:
66 | sources['shodan'] = inquisitor.sources.shodan_search.ShodanAPI(
67 | shodan_api_key, limit=shodan_limit
68 | )
69 | if not shodan_limit:
70 | logger.warning(
71 | 'Shodan Search limit not set. This may potentially exhaust '
72 | 'the daily quota of your Shodan API Key.'
73 | )
74 | # Check if any sources detected
75 | if not sources:
76 | logger.error('No valid transform sources available. Quitting.')
77 | exit(1)
78 | # Perform transforms on owned assets only
79 | found = 0
80 | logger.info('Initializing Inquisitor scan mode')
81 | owned = repository.get_assets(include=lambda o,d: o.is_owned(repository))
82 | if not owned:
83 | logger.error(
84 | 'No assets available to transform. Please seed your database '
85 | 'using the "classify" command.'
86 | )
87 | exit(1)
88 | for asset in owned:
89 | asset_type = asset.__class__
90 | asset_module_name = asset_type.__module__
91 | asset_module = sys.modules[asset_module_name]
92 | asset_identifier = getattr(asset, asset_module.OBJECT_ID)
93 | logger.info('Transforming: {}: {}'.format(
94 | asset_module_name,
95 | asset_identifier,
96 | ))
97 | for result in asset.transform(repository, sources):
98 | __id = repository.put_asset_object(result)
99 | if __id:
100 | result_type = result.__class__
101 | result_module_name = result_type.__module__
102 | result_module = sys.modules[result_module_name]
103 | result_identifier = getattr(result, result_module.OBJECT_ID)
104 | logger.info('Found: {}: {}'.format(
105 | result_module_name,
106 | result_identifier,
107 | ))
108 | found += 1
109 | repository.put_asset_object(asset, overwrite=True)
110 | logger.info('New assets found: {}'.format(found))
111 | logger.info('Inquisitor has completed')
112 |
113 | def status(repository, strong):
114 | table = [
115 | ['Asset', 'Accepted', 'Unknown', 'Rejected', 'Total'],
116 | list(),
117 | ]
118 | for asset_module in inquisitor.ASSET_MODULES:
119 | asset_type = asset_module.ASSET_CLASS
120 | total = 0
121 | row = [asset_type.__name__]
122 | for owned in [True, None, False]:
123 | results = repository.get_assets(
124 | include=(
125 | lambda o,d:
126 | isinstance(o, asset_type) and (
127 | (not strong and o.is_owned(repository) is owned) or
128 | (strong and d['owned'] is owned)
129 | )
130 | )
131 | )
132 | row.append(len(results))
133 | total += len(results)
134 | row.append(total)
135 | table.append(row)
136 | if not strong:
137 | table[0][3] = 'Not Accepted'
138 | for row in table:
139 | if row:
140 | del row[2]
141 | print tabulate.tabulate(table)
142 |
143 | def classify(repository, args):
144 | for asset_module in inquisitor.ASSET_MODULES:
145 | # Extract assets from arguments
146 | classified = asset_module.main_classify_canonicalize(args)
147 | accepted, unmarked, rejected = classified
148 | targets = [
149 | (accepted, True),
150 | (unmarked, None),
151 | (rejected, False),
152 | ]
153 | # Execute asset classification
154 | for target, owned in targets:
155 | for identifier in target:
156 | repository.put_asset_string(
157 | asset_module.ASSET_CLASS,
158 | identifier,
159 | owned=owned,
160 | overwrite=True
161 | )
162 |
163 | def dump(repository, path, all_flag):
164 | repo_dict = dict()
165 | for asset_module in inquisitor.ASSET_MODULES:
166 | asset_type = asset_module.ASSET_CLASS
167 | asset_list = list()
168 | results = repository.get_assets(
169 | include=lambda o,d: isinstance(o, asset_type)
170 | )
171 | for asset in results:
172 | if all_flag or asset.owned is not False:
173 | asset_entry = dict(asset.__dict__)
174 | asset_entry['owned'] = asset.is_owned(repository)
175 | asset_entry['strong_owned'] = asset.owned
176 | asset_list.append(asset_entry)
177 | repo_dict[asset_module.REPOSITORY] = list(reversed(sorted(
178 | asset_list, key=lambda a: a['owned']
179 | )))
180 | if path is None:
181 | print json.dumps(repo_dict, indent=4, sort_keys=True)
182 | else:
183 | with open(path, 'w') as handle:
184 | json.dump(repo_dict, handle, indent=4, sort_keys=True)
185 |
186 | def visualize(repository, use_last=False):
187 | # Initialize web server directory
188 | web_dir = os.path.join(os.path.dirname(__file__), 'report')
189 | os.chdir(web_dir)
190 | if not use_last:
191 | def traverse(node, asset):
192 | # Determine name of node
193 | if asset:
194 | asset_type = asset.__class__
195 | asset_module = sys.modules[asset_type.__module__]
196 | node['name'] = '{} : {}'.format(
197 | asset_type.__name__,
198 | getattr(asset, asset_module.OBJECT_ID)
199 | )
200 | else:
201 | node['name'] = 'root'
202 | # Determine node children
203 | children = repository.get_assets(
204 | include=lambda o,d:
205 | o.is_owned(repository) and
206 | o.parent_asset(repository) == asset
207 | )
208 | if children:
209 | node['children'] = list()
210 | for child in children:
211 | subnode = dict()
212 | traverse(subnode, child)
213 | node['children'].append(subnode)
214 | else:
215 | node['size'] = 1
216 | # Start traversal
217 | root = {}
218 | traverse(root, None)
219 | # Dump visualization to JSON file
220 | with open('report.json', 'w') as handle:
221 | json.dump(root, handle, indent=4, sort_keys=True)
222 | # Start HTTP Server
223 | port = 8080
224 | webbrowser.open('http://localhost:{}/index.html'.format(port), new=2)
225 | http_handler = SimpleHTTPServer.SimpleHTTPRequestHandler
226 | httpd = SocketServer.TCPServer(("", port), http_handler)
227 | httpd.serve_forever()
228 |
229 | def main(cmd_args):
230 |
231 | # Create main argument parser
232 | parent_parser = argparse.ArgumentParser(add_help=False)
233 | parent_parser.add_argument(
234 | 'database',
235 | metavar='DATABASE',
236 | type=database,
237 | help=(
238 | 'The path to the intelligence database to use. If specified file '
239 | 'does not exist, a new one will be created.'
240 | ),
241 | )
242 |
243 | # Create subcommand parsers
244 | main_parser = argparse.ArgumentParser()
245 | commands_subparsers = main_parser.add_subparsers(
246 | title='command',
247 | help='The action to perform.',
248 | dest='command',
249 | )
250 |
251 | # Parse arguments for scan command
252 | scan_parser = commands_subparsers.add_parser(
253 | 'scan',
254 | help=(
255 | 'Search OSINT sources for intelligence based on known assets '
256 | 'belonging to the target.'
257 | ),
258 | parents=[parent_parser],
259 | )
260 | scan_parser.add_argument(
261 | '--google-dev-key',
262 | metavar='GOOGLE_DEV_KEY',
263 | type=str,
264 | help=(
265 | 'Specifies the developer key to use to query Google Custom '
266 | 'Search. Visit the Google APIs Console '
267 | '(http://code.google.com/apis/console) to get an API key. If not'
268 | 'specified, the script will simply skip asset transforms that '
269 | 'involve Google Search.'
270 | ),
271 | dest='google_dev_key',
272 | )
273 | scan_parser.add_argument(
274 | '--google-cse-id',
275 | metavar='GOOGLE_CSE_ID',
276 | type=str,
277 | help=(
278 | 'Specifies the custom search engine to query. Visit the Google '
279 | 'Custom Search Console (https://cse.google.com/cse/all) to create '
280 | 'your own Google Custom Search Engine. If not specified, the '
281 | 'script will simply skip asset transforms that involve Google '
282 | 'Search.'
283 | ),
284 | dest='google_cse_id',
285 | )
286 | scan_parser.add_argument(
287 | '--google-limit',
288 | metavar='GOOGLE_LIMIT',
289 | type=int,
290 | help=(
291 | 'The number of pages to limit Google Search to. This is to avoid '
292 | 'exhausting your daily quota.'
293 | ),
294 | default=None,
295 | )
296 | scan_parser.add_argument(
297 | '--shodan-api-key',
298 | metavar='SHODAN_API_KEY',
299 | type=str,
300 | help=(
301 | 'Specifies the API key to use to query Shodan. Log into your '
302 | 'Shodan account (https://www.shodan.io/) and look at the top '
303 | 'right corner of the page in order to view your API key. If not '
304 | 'specified, the script will simply skip asset transforms that '
305 | 'involve Shodan.'
306 | ),
307 | dest='shodan_api_key',
308 | )
309 | scan_parser.add_argument(
310 | '--shodan-limit',
311 | metavar='SHODAN_LIMIT',
312 | type=int,
313 | help=(
314 | 'The number of pages to limit Shodan Search to. This is to avoid '
315 | 'exhausting your daily quota.'
316 | ),
317 | default=None,
318 | )
319 |
320 | # Parse arguments for status command
321 | status_parser = commands_subparsers.add_parser(
322 | 'status',
323 | help=(
324 | 'Prints out the current status of the specified intelligence '
325 | 'database.'
326 | ),
327 | parents=[parent_parser],
328 | )
329 | status_parser.add_argument(
330 | '-s', '--strong',
331 | help=(
332 | 'Indicates if the status will be based on the strong ownership '
333 | 'classification.'
334 | ),
335 | action='store_true',
336 | default=False,
337 | )
338 |
339 | # Parse arguments for classify command
340 | classify_parser = commands_subparsers.add_parser(
341 | 'classify',
342 | help=(
343 | 'Classifies an existing asset as either belonging or not '
344 | 'belonging to the target. Adds a new asset with the specified '
345 | 'classification if none is present.'
346 | ),
347 | parents=[parent_parser],
348 | )
349 | for asset_module in inquisitor.ASSET_MODULES:
350 | asset_module.main_classify_args(classify_parser)
351 |
352 | # Parse arguments for dump command
353 | dump_parser = commands_subparsers.add_parser(
354 | 'dump',
355 | help='Dumps the contents of the database into a JSON file',
356 | parents=[parent_parser],
357 | )
358 | dump_parser.add_argument(
359 | '-j', '--json',
360 | metavar='FILE',
361 | type=str,
362 | help='The path to dump the JSON file to. Overwrites existing files.',
363 | )
364 | dump_parser.add_argument(
365 | '-a', '--all',
366 | help='Include rejected assets in dump.',
367 | action='store_true',
368 | default=False,
369 | )
370 |
371 | # Parse arguments for visualize command
372 | visualize_parser = commands_subparsers.add_parser(
373 | 'visualize',
374 | help=(
375 | 'Create a D3.js visualization based on the contents of the '
376 | 'specified intelligence database.'
377 | ),
378 | parents=[parent_parser],
379 | )
380 | visualize_parser.add_argument(
381 | '-l', '--last',
382 | help=(
383 | 'Simply open the last visualization generated instead of creating '
384 | 'a new one.'
385 | ),
386 | action='store_true',
387 | default=False,
388 | )
389 |
390 | # Perform actual parsing of arguments
391 | args = main_parser.parse_args(cmd_args)
392 |
393 | # Determine chosen command and pass to appropriate subroutine
394 | if args.command == 'scan':
395 | scan(
396 | args.database,
397 | google_dev_key=args.google_dev_key,
398 | google_cse_id=args.google_cse_id,
399 | google_limit=args.google_limit,
400 | shodan_api_key=args.shodan_api_key,
401 | shodan_limit=args.shodan_limit,
402 | )
403 | exit(0)
404 | if args.command == 'status':
405 | status(args.database, args.strong)
406 | return
407 | if args.command == 'classify':
408 | classify(args.database, args)
409 | return
410 | if args.command == 'dump':
411 | dump(args.database, args.json, args.all)
412 | return
413 | if args.command == 'visualize':
414 | visualize(args.database, args.last)
415 | return
416 |
417 | # Entry Point
418 | if __name__ == '__main__':
419 | # Call the main function
420 | main(sys.argv[1:])
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Inquisitor
2 |
3 | > #### Notice
4 | > This project is only partially complete and I have yet to implement many of the features described in the following blog post I made: https://penafieljlm.com/2017/07/14/inquisitor/.
5 |
6 | Inquisitor is a simple tool for gathering information on companies and organizations through the use of Open Source Intelligence (OSINT) sources. It is heavily inspired from how Maltego and recon-ng operates, and the tool pretty much re-implements some of the features of those tools but adds an additonal layer of opinion-based semantics on top of asset types in order to create an easy-to-use workflow.
7 |
8 | The key features of Inquisitor include:
9 |
10 | 1. The ability to cascade the ownership label of an asset (e.g. if a Registrant Name is known to belong to the target organization, then the hosts and networks registered with that name shall be marked as belonging to the target organization)
11 | 2. The ability transform assets into other potentially related assets through querying open sources such as Google and Shodan
12 | 3. The ability to visualize the relationships of those assets through a zoomable pack layout
13 |
14 | ## Concept
15 |
16 | The whole concept of Inquisitor revolves around the idea of extracting information from open sources based on what is already known about a target organization. In the context of Inquisitor these are called "transforms". Related information may also be immidiately retrieved from an known asset based on metadata also retrievable from open sources such as whois and internet registries.
17 |
18 | The concepts are discussed in further detail in this blog article: https://penafieljlm.com/2017/07/14/inquisitor/
19 |
20 | ## Installation
21 |
22 | To install Inquisitor, simply clone the repository, enter it, and execute the installation script.
23 | ```
24 | pip install Cython click
25 | git clone git@github.com:penafieljlm/inquisitor.git
26 | cd inquisitor
27 | python setup.py install
28 | ```
29 |
30 | ## Usage
31 |
32 | Inquisitor has five basic commands which include `scan`, `status`, `classify`, `dump`, and `visualize`.
33 | ```
34 | usage: inq [-h] {scan,status,classify,dump,visualize} ...
35 |
36 | optional arguments:
37 | -h, --help show this help message and exit
38 |
39 | command:
40 | {scan,status,classify,dump,visualize}
41 | The action to perform.
42 | scan Search OSINT sources for intelligence based on known
43 | assets belonging to the target.
44 | status Prints out the current status of the specified
45 | intelligence database.
46 | classify Classifies an existing asset as either belonging or
47 | not belonging to the target. Adds a new asset with the
48 | specified classification if none is present.
49 | dump Dumps the contents of the database into a JSON file
50 | visualize Create a D3.js visualization based on the contents of
51 | the specified intelligence database.
52 | ```
53 |
54 | ### Scan
55 |
56 | In scan mode, the tool runs all available transforms for all the assets you have in your Intelligence Database. Make sure to create API Keys for the various OSINT sources indicated below and provide it to the script lest the transforms using those sources be skipped. Also, make sure you seed your Intelligence Database with some known owned target assets using the `classify` command first because if the database does not contain any owned assets, there will be nothing to transform.
57 | ```
58 | usage: inq scan [-h] [--google-dev-key GOOGLE_DEV_KEY]
59 | [--google-cse-id GOOGLE_CSE_ID]
60 | [--google-limit GOOGLE_LIMIT]
61 | [--shodan-api-key SHODAN_API_KEY]
62 | [--shodan-limit SHODAN_LIMIT]
63 | DATABASE
64 |
65 | positional arguments:
66 | DATABASE The path to the intelligence database to use. If
67 | specified file does not exist, a new one will be
68 | created.
69 |
70 | optional arguments:
71 | -h, --help show this help message and exit
72 | --google-dev-key GOOGLE_DEV_KEY
73 | Specifies the developer key to use to query Google
74 | Custom Search. Visit the Google APIs Console
75 | (http://code.google.com/apis/console) to get an API
76 | key. If notspecified, the script will simply skip
77 | asset transforms that involve Google Search.
78 | --google-cse-id GOOGLE_CSE_ID
79 | Specifies the custom search engine to query. Visit the
80 | Google Custom Search Console
81 | (https://cse.google.com/cse/all) to create your own
82 | Google Custom Search Engine. If not specified, the
83 | script will simply skip asset transforms that involve
84 | Google Search.
85 | --google-limit GOOGLE_LIMIT
86 | The number of pages to limit Google Search to. This is
87 | to avoid exhausting your daily quota.
88 | --shodan-api-key SHODAN_API_KEY
89 | Specifies the API key to use to query Shodan. Log into
90 | your Shodan account (https://www.shodan.io/) and look
91 | at the top right corner of the page in order to view
92 | your API key. If not specified, the script will simply
93 | skip asset transforms that involve Shodan.
94 | --shodan-limit SHODAN_LIMIT
95 | The number of pages to limit Shodan Search to. This is
96 | to avoid exhausting your daily quota.
97 | ```
98 |
99 | ### Status
100 |
101 | In status mode, the tool simply prints out a quick summary of the status of your scan database.
102 | ```
103 | usage: inq status [-h] [-s] DATABASE
104 |
105 | positional arguments:
106 | DATABASE The path to the intelligence database to use. If specified
107 | file does not exist, a new one will be created.
108 |
109 | optional arguments:
110 | -h, --help show this help message and exit
111 | -s, --strong Indicates if the status will be based on the strong ownership
112 | classification.
113 | ```
114 |
115 | ### Classify
116 |
117 | In classify mode, you will be able to manually add assets and re-classify already existing assets in the Intelligence Database. You should use this command to seed your Intelligence Database with known owned target assets.
118 | ```
119 | usage: inq classify [-h] [-ar REGISTRANT [REGISTRANT ...]]
120 | [-ur REGISTRANT [REGISTRANT ...]]
121 | [-rr REGISTRANT [REGISTRANT ...]]
122 | [-ab BLOCK [BLOCK ...]] [-ub BLOCK [BLOCK ...]]
123 | [-rb BLOCK [BLOCK ...]] [-ah HOST [HOST ...]]
124 | [-uh HOST [HOST ...]] [-rh HOST [HOST ...]]
125 | [-ae EMAIL [EMAIL ...]] [-ue EMAIL [EMAIL ...]]
126 | [-re EMAIL [EMAIL ...]]
127 | [-al LINKEDIN [LINKEDIN ...]]
128 | [-ul LINKEDIN [LINKEDIN ...]]
129 | [-rl LINKEDIN [LINKEDIN ...]]
130 | DATABASE
131 |
132 | positional arguments:
133 | DATABASE The path to the intelligence database to use. If
134 | specified file does not exist, a new one will be
135 | created.
136 |
137 | optional arguments:
138 | -h, --help show this help message and exit
139 | -ar REGISTRANT [REGISTRANT ...], --accept-registrant REGISTRANT [REGISTRANT ...]
140 | Specifies a registrant to classify as accepted.
141 | -ur REGISTRANT [REGISTRANT ...], --unmark-registrant REGISTRANT [REGISTRANT ...]
142 | Specifies a registrant to classify as unmarked.
143 | -rr REGISTRANT [REGISTRANT ...], --reject-registrant REGISTRANT [REGISTRANT ...]
144 | Specifies a registrant to classify as rejected.
145 | -ab BLOCK [BLOCK ...], --accept-block BLOCK [BLOCK ...]
146 | Specifies a block to classify as accepted.
147 | -ub BLOCK [BLOCK ...], --unmark-block BLOCK [BLOCK ...]
148 | Specifies a block to classify as unmarked.
149 | -rb BLOCK [BLOCK ...], --reject-block BLOCK [BLOCK ...]
150 | Specifies a block to classify as rejected.
151 | -ah HOST [HOST ...], --accept-host HOST [HOST ...]
152 | Specifies a host to classify as accepted.
153 | -uh HOST [HOST ...], --unmark-host HOST [HOST ...]
154 | Specifies a host to classify as unmarked.
155 | -rh HOST [HOST ...], --reject-host HOST [HOST ...]
156 | Specifies a host to classify as rejected.
157 | -ae EMAIL [EMAIL ...], --accept-email EMAIL [EMAIL ...]
158 | Specifies a email to classify as accepted.
159 | -ue EMAIL [EMAIL ...], --unmark-email EMAIL [EMAIL ...]
160 | Specifies a email to classify as unmarked.
161 | -re EMAIL [EMAIL ...], --reject-email EMAIL [EMAIL ...]
162 | Specifies a email to classify as rejected.
163 | -al LINKEDIN [LINKEDIN ...], --accept-linkedin LINKEDIN [LINKEDIN ...]
164 | Specifies a LinkedIn Account to classify as accepted.
165 | -ul LINKEDIN [LINKEDIN ...], --unmark-linkedin LINKEDIN [LINKEDIN ...]
166 | Specifies a LinkedIn Account to classify as unmarked.
167 | -rl LINKEDIN [LINKEDIN ...], --reject-linkedin LINKEDIN [LINKEDIN ...]
168 | Specifies a LinkedIn Account to classify as rejected.
169 | ```
170 |
171 | ### Dump
172 |
173 | In dump mode, you will be able to dump the contents of the Intelligence Database into a human-readable JSON file.
174 | ```
175 | usage: inq dump [-h] [-j FILE] [-a] DATABASE
176 |
177 | positional arguments:
178 | DATABASE The path to the intelligence database to use. If
179 | specified file does not exist, a new one will be
180 | created.
181 |
182 | optional arguments:
183 | -h, --help show this help message and exit
184 | -j FILE, --json FILE The path to dump the JSON file to. Overwrites existing
185 | files.
186 | -a, --all Include rejected assets in dump.
187 | ```
188 |
189 | ### Visualize
190 |
191 | In visualize mode, you will be able to acquire a hierarchical visualization of the Intelligence Repository.
192 | ```
193 | usage: inq visualize [-h] [-l] DATABASE
194 |
195 | positional arguments:
196 | DATABASE The path to the intelligence database to use. If specified file
197 | does not exist, a new one will be created.
198 |
199 | optional arguments:
200 | -h, --help show this help message and exit
201 | -l, --last Simply open the last visualization generated instead of creating
202 | a new one.
203 | ```
204 |
205 | ## Workflow
206 |
207 | Now that you know the basic features of Inquisitor, it's time you learn how to *actually* use it. Inquisitor has been written with the following steps in mind:
208 |
209 | ### Seeding
210 |
211 | In this step, your Intelligence Database doesn't have anything in it yet. We're going to have to start somewhere so go ahead and seed the database with assets that you know belong to your target organization. You can do this using the `classify` command.
212 |
213 | ### Scanning
214 |
215 | Now that the database has assets that are known to belong to your target organization. You can then proceed with scanning. You can do this using the `scan` command.
216 |
217 | When you invoke the `scan` command on your Intelligence Database, Inquisitor proceeds to run the `transform` methods of assets that are classified as `accepted`. Once scanning is finished, you're going to end up with more assets that might potentially belong to your target organization.
218 |
219 | If you don't end up with any new assets, you can either seed your Intelligence Database with new information, or simply proceed to wrap up the process by proceeding to the Reporting step.
220 |
221 | ### Classifying
222 |
223 | While Inquisitor performs automatic asset classification for you, it might end up missing some assets that do, in fact, belong to your target organization.
224 |
225 | When this happens, you're going to have to check the database contents and manually classify the assets. Usually, you'd want to pay attention to **Registrant** assets as there is no way to automatically determine ownership for that asset type. Also most other asset types rely on the ownership classification of Registrant assets in order to determine whether they belong to your target or not, so it's definitelty best to pay attention to your Registrant assets. Additionally, you don't end up with a lot of Registrant assets in the first place so it's not going to be that hard sifting through them.
226 |
227 | ### Reporting
228 |
229 | You can generate a visualization of the assets that belong to your target organization using the `visualize` command or the `dump` command.
230 |
231 | ## Demo
232 |
233 | I have video ddemonstrations of the tool running in the following link: https://drive.google.com/open?id=0B_O70BVu38TRclo5dWRBWkdTTWc
234 |
235 | I wasn't able to fully record the run of the scan command though since my free screen recorder only records up to 10 minutes.
236 |
237 | ## Development
238 |
239 | The the Inquisitor project is laid out in the following format:
240 | ```
241 | .
242 | |-- README.md
243 | |-- inquisitor
244 | | |-- __init__.py
245 | | |-- assets
246 | | | |-- __init__.py
247 | | | |-- block.py
248 | | | |-- email.py
249 | | | |-- host.py
250 | | | |-- linkedin.py
251 | | | `-- registrant.py
252 | | |-- extractors
253 | | | |-- __init__.py
254 | | | `-- emails.py
255 | | `-- sources
256 | | |-- __init__.py
257 | | |-- google_search.py
258 | | `-- shodan_search.py
259 | |-- inq
260 | |-- report
261 | | `-- index.html
262 | |-- setup.py
263 | `-- tests
264 | |-- __init__.py
265 | `-- test_inq.py
266 | ```
267 |
268 | It has three main modules named `assets`, `extractors`, and `sources`. The main script is called `inq`.
269 |
270 | As a developer you would mostly be interested in adding new types of assets into the system so the developer guide would mostly focus on that.
271 |
272 | ### Repository
273 |
274 | Before we move on to actually implementing asset classes, we would first need to understand how to interact with the Intelligence Database as we will be interacting with it when we derive related assets from our asset classes.
275 |
276 | The source code for the Intelligence Database is stored in the `inquisitor/__init__.py` file. The actual name for the logical wrapper of the Intelligence Database is called `IntelligenceRepository`.
277 |
278 | You only need to call the `IntelligenceRepository.get_asset_string` function from asset classes as appending new assets onto the Intelligence Database is the responsibility of the `scan` module in the `inq` script. You would mostly use this function to create instances of assets or retrieve them from the database if they exist. This function is important when returning assets from the `related` and `transform` functions of your asset classes as creating new asset objects is expensive since some of them use network resources during initialization.
279 |
280 | ```
281 | Function
282 |
283 | IntelligenceRepository.get_asset_string(asset_type, identifier, create=False, store=False)
284 |
285 | Description
286 |
287 | Retrieves the primary key and asset object for the asset with the provided
288 | type and identifier.
289 |
290 | Parameters
291 |
292 | asset_type: class, required
293 |
294 | The type of the asset to retrieve from the Intelligence Database. You
295 | will actually have to pass the class object of the asset type you want
296 | to retrieve.
297 |
298 | identifier: any, required
299 |
300 | The identifier of the asset to retrieve. Consider the identifier as the
301 | unique attribute of an asset object. As for which attribute is to be
302 | used to identify an asset, it depends on the contents of the OBJECT_ID
303 | variable in the asset module.
304 |
305 | create: bool, optional, default=False
306 |
307 | When no matching asset object is found, a new one will be created and
308 | returned if this parameter is set to True. The new asset will not
309 | necessarily be stored in the Intelligence Database unless specified
310 | using the "store" parameter. However, I suggest you do not do this as
311 | adding assets to the Intelligence Database is the responsibility of
312 | another module.
313 |
314 | store: bool, optional, default=False
315 |
316 | When a new asset is created when none is found, the new one will be
317 | stored in the Intelligence Database. As said previously, I suggest that
318 | you do not do this as adding assets to the Intelligence Database is the
319 | responsibility of another module.
320 |
321 | Returns
322 |
323 | A two-element tuple where the first element is the database primary key of
324 | the element returned, and the second element is the deserialized asset
325 | object retrieved from the database.
326 |
327 | None if the asset was not found.
328 |
329 | If the asset was not found and the create flag was set to True, the primary
330 | key member of the tuple will be set to None.
331 |
332 | ```
333 |
334 | ### Assets
335 |
336 | To create a new asset type, create a new file inside the `inquisitor/assets` directory and paste the following skeleton code inside:
337 |
338 | ```python
339 | import inquisitor.assets
340 |
341 | class ASSET_NAMEValidateException(Exception):
342 | pass
343 |
344 | def canonicalize(ASSET_IDENTIFIER):
345 | return ASSET_IDENTIFIER
346 |
347 | def main_classify_args(parser):
348 | parser.add_argument(
349 | '-aASSET_NAME_LETTER', '--accept-ASSET_NAME',
350 | metavar='ASSET_NAME',
351 | type=canonicalize,
352 | nargs='+',
353 | help='Specifies a ASSET_NAME to classify as accepted.',
354 | dest='ASSET_NAMEs_accepted',
355 | default=list(),
356 | )
357 | parser.add_argument(
358 | '-uASSET_NAME_LETTER', '--unmark-ASSET_NAME',
359 | metavar='ASSET_NAME',
360 | type=canonicalize,
361 | nargs='+',
362 | help='Specifies a ASSET_NAME to classify as unmarked.',
363 | dest='ASSET_NAMEs_unmarked',
364 | default=list(),
365 | )
366 | parser.add_argument(
367 | '-rASSET_NAME_LETTER', '--reject-ASSET_NAME',
368 | metavar='ASSET_NAME',
369 | type=canonicalize,
370 | nargs='+',
371 | help='Specifies a ASSET_NAME to classify as rejected.',
372 | dest='ASSET_NAME_rejected',
373 | default=list(),
374 | )
375 |
376 | def main_classify_canonicalize(args):
377 | accepted = set(args.ASSET_NAMEs_accepted)
378 | unmarked = set(args.ASSET_NAMEs_unmarked)
379 | rejected = set(args.ASSET_NAME_rejected)
380 | redundant = set.intersection(accepted, unmarked, rejected)
381 | if redundant:
382 | raise ValueError(
383 | ('Conflicting classifications for ASSET_NAMEs '
384 | ': {}').format(list(redundant))
385 | )
386 | accepted = set([canonicalize(a) for a in accepted])
387 | unmarked = set([canonicalize(a) for a in unmarked])
388 | rejected = set([canonicalize(a) for a in rejected])
389 | return (accepted, unmarked, rejected)
390 |
391 | class ASSET_NAME(inquisitor.assets.Asset):
392 |
393 | def __init__(self, ASSET_IDENTIFIER, owned=None):
394 | super(self.__class__, self).__init__(owned=owned)
395 | self.ASSET_IDENTIFIER = canonicalize(ASSET_IDENTIFIER)
396 | # TODO: Perform other initialization actions here
397 |
398 | def __eq__(self, other):
399 | if not isinstance(other, self.__class__):
400 | return False
401 | return self.ASSET_IDENTIFIER == other.ASSET_IDENTIFIER
402 |
403 | def related(self, repo):
404 | # Prepare the results
405 | results = set()
406 | # TODO: Create related assets here based on the attributes of this asset
407 | # Return the results
408 | return results
409 |
410 | def transform(self, repo, sources):
411 | # Prepare the results
412 | assets = set()
413 | # Google Transforms
414 | if sources.get('google'):
415 | subassets = self.cache_transform_get('google', repo)
416 | if not subassets:
417 | # Acquire API
418 | google = sources['google']
419 | # TODO: Perform Google queries here and the results to 'subassets'
420 | # Cache The Transform
421 | self.cache_transform_store('google', subassets)
422 | assets.update(subassets)
423 | # Shodan Transforms
424 | if sources.get('shodan'):
425 | subassets = self.cache_transform_get('shodan', repo)
426 | if not subassets:
427 | # Acquire API
428 | shodan = sources['shodan']
429 | # TODO: Perform Google queries here and the results to 'subassets'
430 | # Cache The Transform
431 | self.cache_transform_store('shodan', subassets)
432 | assets.update(subassets)
433 | # Return the results
434 | return assets
435 |
436 | def is_owned(self, repo):
437 | if self.owned:
438 | return True
439 | # TODO: Automatically determine ownership based on repo contents
440 | return False
441 |
442 | def parent_asset(self, repo):
443 | # TODO: Return parent asset based on repo contents
444 | return None
445 |
446 | REPOSITORY = 'ASSET_REPOSITORY'
447 | ASSET_CLASS = ASSET_NAME
448 | OBJECT_ID = 'ASSET_IDENTIFIER'
449 | ```
450 |
451 | Now replace the following strings with the appropriate values
452 | * `ASSET_NAME` : Proper name of your asset (e.g. Registrant, Host, etc.)
453 | * `ASSET_IDENTIFIER` : The name of the identifier attribute of your asset
454 | * `ASSET_NAME_LETTER` : The first letter of your asset in lowercase
455 | * `ASSET_REPOSITORY` : Lower case of the plural form of your asset name
456 |
457 | Finally, in `inquisitor/__init__.py`, register your asset in the `ASSET_MODULES` list. Make sure you import your new asset from the file in question.
458 |
459 | Congratulations! By this point, you now have a new working asset type!
460 |
461 | However, you are going to need to implement the following methods to make sure your assets get correlated with other asset types:
462 |
463 | ```
464 | Function
465 |
466 | related
467 |
468 | Description
469 |
470 | Returns the set of assets directly related to the asset in question (i.e.
471 | those that can be derived without querying a search engine).
472 |
473 | When creating asset objects, make sure you use the
474 | IntelligenceRepository.get_asset_string method instead of instatiating a
475 | new one your self so the asset can be returned from the repository if it
476 | exists.
477 |
478 | Set the create flag to True when calling the method in question in order
479 | to return a new object when one isn't found.
480 |
481 | Set the store flag to False as appending assets is the job of another
482 | module.
483 |
484 | Parameters
485 |
486 | repo: IntelligenceRepository
487 |
488 | The Intelligence Repository that is being used in the current context.
489 |
490 | Returns
491 |
492 | Set of assets directly related to the asset in question.
493 |
494 | ```
495 |
496 | ```
497 | Function
498 |
499 | transform
500 |
501 | Description
502 |
503 | Returns the set of assets potentially related to the asset in question
504 | (i.e. those that can be derived by querying a search engine).
505 |
506 | You may access search engine objects through the provided sources
507 | parameter.
508 |
509 | Each search engine object has a transform method which automatically
510 | creates asset objects for you. You just need to provide it the repository
511 | and your query string, and then append the objects it returns to the set
512 | of assets to be returned by your asset's transform method.
513 |
514 | Parameters
515 |
516 | repo: IntelligenceRepository
517 |
518 | The Intelligence Repository that is being used in the current context.
519 |
520 | sources: dict
521 |
522 | The list of search engine objects that are available for use.
523 |
524 | Returns
525 |
526 | Set of assets potentially related to the asset in question.
527 |
528 | ```
529 |
530 | ```
531 | Function
532 |
533 | is_owned
534 |
535 | Description
536 |
537 | Determines if there is high confidence that this asset does indeed belong
538 | to the target. Usually checks for any "strong" classification tag first by
539 | looking at the contents of the "owned" variable, before performing
540 | automatic evaluation.
541 |
542 | Automatic evaluation depends on what type of asset you're writing. For
543 | example, for a Host asset, the secondary sources of determining ownership
544 | would include looking if its registrant is owned by the target, if it's
545 | parent domain is owned by the target. etc.
546 |
547 | Parameters
548 |
549 | repo: IntelligenceRepository
550 |
551 | The Intelligence Repository that is being used in the current context.
552 |
553 | Returns
554 |
555 | True it is determined with high confidence that this asset does indeed
556 | belong to the target.
557 |
558 | ```
559 |
560 | ```
561 | Function
562 |
563 | parent_asset
564 |
565 | Description
566 |
567 | Returns the asset object that is considered the parent of this asset
568 | object.
569 |
570 | Parameters
571 |
572 | repo: IntelligenceRepository
573 |
574 | Returns
575 |
576 | The asset object that this asset falls under (e.g. a Block is under a
577 | Registrant, a Host is under a Block, a Host is under another Host, an Email
578 | is under a Host, etc. This is primarily used for visualization.
579 |
580 | ```
581 |
582 | After implementing the above methods, make sure you set the `REPOSITORY`, `ASSET_CLASS`, and `OBJECT_ID` variables on the bottom of your asset's source code.
583 |
584 | ## Contact and Notes
585 |
586 | The scan mode isn't fully tested because of quotas concerning the search engines involved. Also, this project was made in a rush as part of a week-long hackaton challenge so there might be a lot of problems lying around. Please create an issue ticket or contact me at penafieljlm@gmail.com if you find a bug or have some questions.
587 |
588 | ## Disclaimer
589 |
590 | This work is derived from the approaches implemented by the Maltego and recon-ng Open Source Intelligence tools. I supplemented these approaches with ideas that are either already common knowledge (e.g. whois tells you who the owner of a domain is, subdomains are owned by the same organization owning their parent - as implied by domain name bruteforcing attacks, organizations are authoritative of the domain names that they own, etc.), or are original and were conceived by me in my own personal time as part of my hobby (e.g. acceptability ratings, various transforms, classification inheritance, etc.).
591 |
592 | No component of this work was derived from any work that I have done for any employer in the past. The whole project, including the proof-of-concept, was written from scratch and was augmented with ideas from the information security community.
593 |
--------------------------------------------------------------------------------