├── tests ├── __init__.py └── test_inquisitor.py ├── inquisitor ├── extractors │ ├── __init__.py │ └── emails.py ├── sources │ ├── __init__.py │ ├── google_search.py │ └── shodan_search.py ├── assets │ ├── __init__.py │ ├── registrant.py │ ├── email.py │ ├── linkedin.py │ ├── block.py │ └── host.py └── __init__.py ├── .gitignore ├── setup.py ├── report └── index.html ├── inq └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /inquisitor/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /inquisitor/sources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.bat 3 | *.json 4 | *.db -------------------------------------------------------------------------------- /inquisitor/extractors/emails.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | REGEX = re.compile(r'[^\s@<>]+@[^\s@<>]+\.[^\s@<>]+\b') 4 | 5 | def extract(string): 6 | return set(REGEX.findall(string)) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, find_packages 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | config = { 7 | 'description': 'Opinionated organisation-centric OSINT inspired from recon-ng and Maltego', 8 | 'author': 'John Lawrence M. Penafiel', 9 | 'url': 'https://github.com/penafieljlm/inquisitor', 10 | 'download_url': 'https://github.com/penafieljlm/inquisitor', 11 | 'author_email': 'penafieljlm@gmail.com', 12 | 'version': '0.1', 13 | 'install_requires': [ 14 | 'google-api-python-client', 15 | 'ipwhois', 16 | 'netaddr', 17 | 'nose', 18 | 'python-whois', 19 | 'shodan', 20 | 'tabulate', 21 | 'tld', 22 | 'unidecode', 23 | 'unqlite', 24 | 'validate_email', 25 | ], 26 | 'packages': find_packages(), 27 | 'scripts': ['inq'], 28 | 'name': 'inquisitor' 29 | } 30 | 31 | setup(**config) -------------------------------------------------------------------------------- /inquisitor/assets/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class Asset(object): 4 | 5 | def __init__(self, owned=None): 6 | self.owned = owned 7 | self.transforms = dict() 8 | 9 | def cache_transform_store(self, source, assets): 10 | cached = list() 11 | for asset in assets: 12 | module_name = asset.__class__.__module__ 13 | module = sys.modules[module_name] 14 | entry = [module_name, getattr(asset, module.OBJECT_ID)] 15 | if entry not in cached: 16 | cached.append(entry) 17 | self.transforms[source] = cached 18 | 19 | def cache_transform_get(self, source, repo): 20 | results = set() 21 | if source not in self.transforms: 22 | return results 23 | cached = self.transforms[source] 24 | for module_name, object_id in cached: 25 | module = sys.modules[module_name] 26 | asset_type = module.ASSET_CLASS 27 | results.add(repo.get_asset_string( 28 | asset_type, 29 | object_id, 30 | create=True, 31 | )[1]) 32 | return results 33 | -------------------------------------------------------------------------------- /report/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /inquisitor/sources/google_search.py: -------------------------------------------------------------------------------- 1 | import googleapiclient.discovery 2 | import inquisitor.assets.email 3 | import inquisitor.assets.host 4 | import inquisitor.assets.linkedin 5 | import inquisitor.assets.registrant 6 | import inquisitor.extractors.emails 7 | import logging 8 | import urlparse 9 | 10 | class GoogleAPI: 11 | 12 | def __init__(self, dev_key, cse_id, limit=None): 13 | self.dev_key = dev_key 14 | self.cse_id = cse_id 15 | self.limit = limit 16 | self.service = googleapiclient.discovery.build( 17 | "customsearch", "v1", 18 | developerKey=self.dev_key, 19 | ) 20 | 21 | def search(self, query): 22 | items = list() 23 | page = 1 24 | start = 1 25 | while True: 26 | if self.limit and page > self.limit: 27 | break 28 | try: 29 | results = self.service.cse().list( 30 | q=query, 31 | cx=self.cse_id, 32 | start=start, 33 | ).execute() 34 | if results.get('items'): 35 | items.extend(results.get('items')) 36 | start += 10 37 | page += 1 38 | except googleapiclient.errors.HttpError: 39 | break 40 | return items 41 | 42 | def transform(self, repository, query): 43 | assets = set() 44 | items = self.search(query) 45 | for item in items: 46 | parsed_link = urlparse.urlparse(item['link']) 47 | # Extract Host 48 | try: 49 | assets.add(repository.get_asset_string( 50 | inquisitor.assets.host.Host, 51 | parsed_link.netloc, 52 | create=True, 53 | )[1]) 54 | except inquisitor.assets.host.HostValidateException as e: 55 | logging.error(e.message) 56 | # Extract Emails 57 | for email in inquisitor.extractors.emails.extract(item['snippet']): 58 | try: 59 | assets.add(repository.get_asset_string( 60 | inquisitor.assets.email.Email, 61 | email, 62 | create=True, 63 | )[1]) 64 | except inquisitor.assets.email.EmailValidateException as e: 65 | logging.error(e.message) 66 | # Extract LinkedIn Accounts 67 | if parsed_link.netloc.endswith('.linkedin.com'): 68 | try: 69 | # Create the asset 70 | asset = repository.get_asset_string( 71 | inquisitor.assets.linkedin.LinkedIn, 72 | item['link'], 73 | create=True, 74 | )[1] 75 | # Apply work around for acquiring the corporation 76 | if (item.get('pagemap') and 77 | item.get('pagemap').get('person') and 78 | item.get('pagemap').get('person')[0].get('org')): 79 | asset.corporation = inquisitor.assets.registrant.canonicalize( 80 | item.get('pagemap').get('person')[0].get('org') 81 | ) 82 | # Add the asset 83 | assets.add(asset) 84 | except inquisitor.assets.linkedin.LinkedInValidateException as e: 85 | logging.error(e.message) 86 | # TODO: extract accounts for other social media networks 87 | return assets -------------------------------------------------------------------------------- /inquisitor/sources/shodan_search.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets.host 2 | import inquisitor.assets.registrant 3 | import logging 4 | import shodan 5 | 6 | class ShodanAPI: 7 | 8 | def __init__(self, api_key, limit=None): 9 | self.api_key = api_key 10 | self.service = shodan.Shodan(self.api_key) 11 | self.limit = limit 12 | 13 | def search(self, query): 14 | page = 1 15 | items = list() 16 | while True: 17 | if self.limit and page > self.limit: 18 | break 19 | results = self.service.search(query, page=page) 20 | if results.get('matches'): 21 | items.extend(results.get('matches')) 22 | if len(items) >= results['total']: 23 | break 24 | page += 1 25 | return items 26 | 27 | def transform(self, repository, query): 28 | assets = set() 29 | items = self.search(query) 30 | for item in items: 31 | # Extract ISP Registrant 32 | try: 33 | assets.add(repository.get_asset_string( 34 | inquisitor.assets.registrant.Registrant, 35 | item['isp'], 36 | create=True, 37 | )[1]) 38 | except inquisitor.assets.registrant.RegistrantValidateException as e: 39 | logging.error(e.message) 40 | # Extract Organization Registrant 41 | try: 42 | assets.add(repository.get_asset_string( 43 | inquisitor.assets.registrant.Registrant, 44 | item['org'], 45 | create=True, 46 | )[1]) 47 | except inquisitor.assets.registrant.RegistrantValidateException as e: 48 | logging.error(e.message) 49 | # Extract Host From Options 50 | if (item.get('_shodan') and item.get('_shodan').get('options') and 51 | item.get('_shodan').get('options').get('hostname')): 52 | try: 53 | assets.add(repository.get_asset_string( 54 | inquisitor.assets.host.Host, 55 | item['_shodan']['options']['hostname'], 56 | create=True, 57 | )[1]) 58 | except inquisitor.assets.host.HostValidateException as e: 59 | logging.error(e.message) 60 | # Extract Host From HTTP 61 | if item.get('http') and item.get('http').get('host'): 62 | try: 63 | assets.add(repository.get_asset_string( 64 | inquisitor.assets.host.Host, 65 | item['http']['host'], 66 | create=True, 67 | )[1]) 68 | except inquisitor.assets.host.HostValidateException as e: 69 | logging.error(e.message) 70 | # Extract Hosts From Hostnames 71 | for host in item['hostnames']: 72 | try: 73 | assets.add(repository.get_asset_string( 74 | inquisitor.assets.host.Host, 75 | host, 76 | create=True, 77 | )[1]) 78 | except inquisitor.assets.host.HostValidateException as e: 79 | logging.error(e.message) 80 | # Extract Hosts From Domains 81 | for host in item['domains']: 82 | try: 83 | assets.add(repository.get_asset_string( 84 | inquisitor.assets.host.Host, 85 | host, 86 | create=True, 87 | )[1]) 88 | except inquisitor.assets.host.HostValidateException as e: 89 | logging.error(e.message) 90 | return assets -------------------------------------------------------------------------------- /inquisitor/__init__.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets.block 2 | import inquisitor.assets.email 3 | import inquisitor.assets.host 4 | import inquisitor.assets.linkedin 5 | import inquisitor.assets.registrant 6 | import sys 7 | import unqlite 8 | 9 | ASSET_MODULES = [ 10 | inquisitor.assets.registrant, 11 | inquisitor.assets.block, 12 | inquisitor.assets.host, 13 | inquisitor.assets.email, 14 | inquisitor.assets.linkedin, 15 | ] 16 | 17 | class IntelligenceRepository: 18 | 19 | def __init__(self, path): 20 | self.database = unqlite.UnQLite(path) 21 | self.repositories = dict() 22 | for asset_module in ASSET_MODULES: 23 | identifier = asset_module.REPOSITORY 24 | repository = self.database.collection(identifier) 25 | repository.create() 26 | self.repositories[identifier] = repository 27 | 28 | def get_asset_data(self, asset): 29 | module = sys.modules[asset.__class__.__module__] 30 | repository = self.repositories[module.REPOSITORY] 31 | identifier = module.OBJECT_ID 32 | query = getattr(asset, identifier) 33 | results = repository.filter(lambda a: a['data'][identifier] == query) 34 | return results[0] if results else None 35 | 36 | def get_asset_object(self, asset, create=False, store=False): 37 | result = self.get_asset_data(asset) 38 | asset_type = asset.__class__ 39 | if result: 40 | __id = result['__id'] 41 | data = result['data'] 42 | obj = asset_type.__new__(asset_type) 43 | for name, value in data.iteritems(): 44 | setattr(obj, name, value) 45 | obj.transforms = dict(obj.transforms) 46 | return (__id, obj) 47 | elif create: 48 | asset_module = sys.modules[asset_type.__module__] 49 | asset = asset_type(getattr(asset, asset_module.OBJECT_ID)) 50 | result = (None, asset) 51 | if store: 52 | result[0] = self.put_asset_object(asset) 53 | return result 54 | return None 55 | 56 | def get_asset_string( 57 | self, 58 | asset_type, 59 | identifier, 60 | create=False, 61 | store=False 62 | ): 63 | query = asset_type.__new__(asset_type) 64 | module = sys.modules[asset_type.__module__] 65 | setattr(query, module.OBJECT_ID, identifier) 66 | return self.get_asset_object(query, create=create, store=store) 67 | 68 | def get_assets(self, include, limit=None): 69 | results = set() 70 | for asset_module in ASSET_MODULES: 71 | asset_class = asset_module.ASSET_CLASS 72 | repository = self.repositories[asset_module.REPOSITORY] 73 | index = 0 74 | for data in repository.all(): 75 | data = data['data'] 76 | obj = asset_class.__new__(asset_class) 77 | for name, value in data.iteritems(): 78 | setattr(obj, name, value) 79 | obj.transforms = dict(obj.transforms) 80 | if include(obj, data): 81 | results.add(obj) 82 | index += 1 83 | if limit and index >= limit: 84 | break 85 | return results 86 | 87 | def put_asset_object(self, asset, overwrite=False): 88 | result = None 89 | module = sys.modules[asset.__class__.__module__] 90 | repository = self.repositories[module.REPOSITORY] 91 | exists = self.get_asset_data(asset) 92 | if not exists: 93 | result = repository.store({'data': asset.__dict__}) 94 | elif overwrite: 95 | repository.update(exists['__id'], {'data': asset.__dict__}) 96 | result = exists['__id'] 97 | if not exists or overwrite: 98 | for related in asset.related(self): 99 | self.put_asset_object(related, overwrite=False) 100 | return result 101 | 102 | def put_asset_string( 103 | self, 104 | asset_type, 105 | identifier, 106 | owned=None, 107 | overwrite=False 108 | ): 109 | asset = asset_type(identifier, owned=owned) 110 | self.put_asset_object(asset, overwrite=overwrite) 111 | -------------------------------------------------------------------------------- /inquisitor/assets/registrant.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets 2 | import unidecode 3 | import urlparse 4 | 5 | class RegistrantValidateException(Exception): 6 | pass 7 | 8 | def canonicalize(registrant): 9 | if not registrant: 10 | raise RegistrantValidateException('Registrants cannot be None') 11 | if not isinstance(registrant, str) and not isinstance(registrant, unicode): 12 | raise RegistrantValidateException('Registrants must be strings') 13 | registrant = unidecode.unidecode(unicode(registrant.strip())).upper() 14 | return registrant 15 | 16 | def main_classify_args(parser): 17 | parser.add_argument( 18 | '-ar', '--accept-registrant', 19 | metavar='REGISTRANT', 20 | type=canonicalize, 21 | nargs='+', 22 | help='Specifies a registrant to classify as accepted.', 23 | dest='registrants_accepted', 24 | default=list(), 25 | ) 26 | parser.add_argument( 27 | '-ur', '--unmark-registrant', 28 | metavar='REGISTRANT', 29 | type=canonicalize, 30 | nargs='+', 31 | help='Specifies a registrant to classify as unmarked.', 32 | dest='registrants_unmarked', 33 | default=list(), 34 | ) 35 | parser.add_argument( 36 | '-rr', '--reject-registrant', 37 | metavar='REGISTRANT', 38 | type=canonicalize, 39 | nargs='+', 40 | help='Specifies a registrant to classify as rejected.', 41 | dest='registrants_rejected', 42 | default=list(), 43 | ) 44 | 45 | def main_classify_canonicalize(args): 46 | accepted = set(args.registrants_accepted) 47 | unmarked = set(args.registrants_unmarked) 48 | rejected = set(args.registrants_rejected) 49 | redundant = set.intersection(accepted, unmarked, rejected) 50 | if redundant: 51 | raise ValueError( 52 | ('Conflicting classifications for registrants ' 53 | ': {}').format(list(redundant)) 54 | ) 55 | accepted = set([canonicalize(a) for a in accepted]) 56 | unmarked = set([canonicalize(a) for a in unmarked]) 57 | rejected = set([canonicalize(a) for a in rejected]) 58 | return (accepted, unmarked, rejected) 59 | 60 | class Registrant(inquisitor.assets.Asset): 61 | 62 | def __init__(self, registrant, owned=None): 63 | super(self.__class__, self).__init__(owned=owned) 64 | self.registrant = canonicalize(registrant) 65 | 66 | def __eq__(self, other): 67 | if not isinstance(other, self.__class__): 68 | return False 69 | return self.registrant == other.registrant 70 | 71 | def related(self, repo): 72 | # Prepare the results 73 | results = set() 74 | # Return the results 75 | return results 76 | 77 | def transform(self, repo, sources): 78 | # Prepare the results 79 | assets = set() 80 | # Google Transforms 81 | if sources.get('google'): 82 | subassets = self.cache_transform_get('google', repo) 83 | if not subassets: 84 | # Acquire API 85 | google = sources['google'] 86 | # Query: Plain 87 | subassets.update(google.transform(repo, self.registrant)) 88 | # Query: LinkedIn 89 | subassets.update(google.transform( 90 | repo, 'site:linkedin.com {}'.format(self.registrant) 91 | )) 92 | # Cache The Transform 93 | self.cache_transform_store('google', subassets) 94 | assets.update(subassets) 95 | # Shodan Transforms 96 | if sources.get('shodan'): 97 | subassets = self.cache_transform_get('shodan', repo) 98 | if not subassets: 99 | # Acquire API 100 | shodan = sources['shodan'] 101 | # Query: Plain 102 | subassets.update(shodan.transform(repo, self.registrant)) 103 | # Query: Organization 104 | subassets.update(shodan.transform( 105 | repo, 'org:"{}"'.format(self.registrant)) 106 | ) 107 | # Cache The Transform 108 | self.cache_transform_store('shodan', subassets) 109 | assets.update(subassets) 110 | # Return the results 111 | return assets 112 | 113 | def is_owned(self, repo): 114 | if self.owned: 115 | return True 116 | return False 117 | 118 | def parent_asset(self, repo): 119 | # Registrants don't have parents 120 | return None 121 | 122 | REPOSITORY = 'registrants' 123 | ASSET_CLASS = Registrant 124 | OBJECT_ID = 'registrant' -------------------------------------------------------------------------------- /inquisitor/assets/email.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets 2 | import inquisitor.assets.host 3 | import logging 4 | import validate_email 5 | 6 | class EmailValidateException(Exception): 7 | pass 8 | 9 | def canonicalize(email): 10 | if not email: 11 | raise EmailValidateException('Emails cannot be None') 12 | if not isinstance(email, str) and not isinstance(email, unicode): 13 | raise EmailValidateException('Emails must be strings') 14 | email = email.strip() 15 | if not validate_email.validate_email(email): 16 | raise EmailValidateException( 17 | 'Unable to validate email {}'.format(email) 18 | ) 19 | recipient, domain = email.split('@') 20 | try: 21 | domain = inquisitor.assets.host.canonicalize(domain) 22 | except inquisitor.assets.host.HostValidateException: 23 | raise EmailValidateException( 24 | 'Unable to validate domain for email {}'.format(email) 25 | ) 26 | return '@'.join([recipient, domain]) 27 | 28 | def main_classify_args(parser): 29 | parser.add_argument( 30 | '-ae', '--accept-email', 31 | metavar='EMAIL', 32 | type=canonicalize, 33 | nargs='+', 34 | help='Specifies a email to classify as accepted.', 35 | dest='emails_accepted', 36 | default=list(), 37 | ) 38 | parser.add_argument( 39 | '-ue', '--unmark-email', 40 | metavar='EMAIL', 41 | type=canonicalize, 42 | nargs='+', 43 | help='Specifies a email to classify as unmarked.', 44 | dest='emails_unmarked', 45 | default=list(), 46 | ) 47 | parser.add_argument( 48 | '-re', '--reject-email', 49 | metavar='EMAIL', 50 | type=canonicalize, 51 | nargs='+', 52 | help='Specifies a email to classify as rejected.', 53 | dest='emails_rejected', 54 | default=list(), 55 | ) 56 | 57 | def main_classify_canonicalize(args): 58 | accepted = set(args.emails_accepted) 59 | unmarked = set(args.emails_unmarked) 60 | rejected = set(args.emails_rejected) 61 | redundant = set.intersection(accepted, unmarked, rejected) 62 | if redundant: 63 | raise ValueError( 64 | ('Conflicting classifications for emails ' 65 | ': {}').format(list(redundant)) 66 | ) 67 | accepted = set([canonicalize(a) for a in accepted]) 68 | unmarked = set([canonicalize(a) for a in unmarked]) 69 | rejected = set([canonicalize(a) for a in rejected]) 70 | return (accepted, unmarked, rejected) 71 | 72 | class Email(inquisitor.assets.Asset): 73 | 74 | def __init__(self, email, owned=None): 75 | super(self.__class__, self).__init__(owned=owned) 76 | self.email = canonicalize(email) 77 | recipient, domain = self.email.split('@') 78 | self.recipient = recipient 79 | self.domain = domain 80 | 81 | def __eq__(self, other): 82 | if not isinstance(other, self.__class__): 83 | return False 84 | return self.email == other.email 85 | 86 | def related(self, repo): 87 | # Prepare results 88 | results = set() 89 | # Related: Domain 90 | try: 91 | results.add(repo.get_asset_string( 92 | inquisitor.assets.host.Host, 93 | self.domain, 94 | create=True, 95 | )[1]) 96 | except inquisitor.assets.host.HostValidateException as e: 97 | logging.error(e.message) 98 | # Return the results 99 | return results 100 | 101 | def transform(self, repo, sources): 102 | # Prepare the results 103 | assets = set() 104 | # Google Transforms 105 | if sources.get('google'): 106 | subassets = self.cache_transform_get('google', repo) 107 | if not subassets: 108 | # Acquire API 109 | google = sources['google'] 110 | # Query: Email 111 | subassets.update(google.transform( 112 | repo, '"{}"'.format(self.email)) 113 | ) 114 | # Cache The Transform 115 | self.cache_transform_store('google', subassets) 116 | assets.update(subassets) 117 | # Return the results 118 | return assets 119 | 120 | def is_owned(self, repo): 121 | # If manually classified, return the classification 122 | if self.owned is not None: 123 | return self.owned 124 | # Automatically determine ownership 125 | try: 126 | host = repo.get_asset_string(inquisitor.assets.host.Host, self.domain) 127 | if host and host[1].is_owned(repo): 128 | return True 129 | except inquisitor.assets.host.HostValidateException as e: 130 | logging.error(e.message) 131 | return False 132 | 133 | def parent_asset(self, repo): 134 | # Prepare result variable 135 | parent = None 136 | # Check if this email's domain is a valid parent 137 | if parent is None: 138 | if self.domain: 139 | try: 140 | host = repo.get_asset_string(inquisitor.assets.host.Host, self.domain) 141 | if host and host[1].is_owned(repo): 142 | parent = host[1] 143 | return parent 144 | except inquisitor.assets.host.HostValidateException as e: 145 | logging.error(e.message) 146 | # If no parental candidate is found, return None 147 | return None 148 | 149 | REPOSITORY = 'emails' 150 | ASSET_CLASS = Email 151 | OBJECT_ID = 'email' -------------------------------------------------------------------------------- /inquisitor/assets/linkedin.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets 2 | import inquisitor.assets.registrant 3 | import logging 4 | import urlparse 5 | 6 | class LinkedInValidateException(Exception): 7 | pass 8 | 9 | def canonicalize(linkedin): 10 | if not linkedin: 11 | raise LinkedInValidateException('LinkedIn accounts cannot be None') 12 | if not isinstance(linkedin, str) and not isinstance(linkedin, unicode): 13 | raise LinkedInValidateException('LinkedIn accounts must be strings') 14 | # Validate URL 15 | linkedin = linkedin.strip().lower() 16 | parsed = urlparse.urlparse(linkedin) 17 | # Validate Network Location 18 | if not parsed.netloc.endswith('.linkedin.com'): 19 | raise LinkedInValidateException( 20 | 'Failed to validate LinkedIn account: {}'.format(linkedin) 21 | ) 22 | # Validate Path 23 | if not parsed.path.startswith('/in/'): 24 | raise LinkedInValidateException( 25 | 'Failed to validate LinkedIn account: {}'.format(linkedin) 26 | ) 27 | # Return the URL 28 | return linkedin 29 | 30 | def main_classify_args(parser): 31 | parser.add_argument( 32 | '-al', '--accept-linkedin', 33 | metavar='LINKEDIN', 34 | type=canonicalize, 35 | nargs='+', 36 | help='Specifies a LinkedIn Account to classify as accepted.', 37 | dest='linkedin_accepted', 38 | default=list(), 39 | ) 40 | parser.add_argument( 41 | '-ul', '--unmark-linkedin', 42 | metavar='LINKEDIN', 43 | type=canonicalize, 44 | nargs='+', 45 | help='Specifies a LinkedIn Account to classify as unmarked.', 46 | dest='linkedin_unmarked', 47 | default=list(), 48 | ) 49 | parser.add_argument( 50 | '-rl', '--reject-linkedin', 51 | metavar='LINKEDIN', 52 | type=canonicalize, 53 | nargs='+', 54 | help='Specifies a LinkedIn Account to classify as rejected.', 55 | dest='linkedin_rejected', 56 | default=list(), 57 | ) 58 | 59 | def main_classify_canonicalize(args): 60 | accepted = set(args.linkedin_accepted) 61 | unmarked = set(args.linkedin_unmarked) 62 | rejected = set(args.linkedin_rejected) 63 | redundant = set.intersection(accepted, unmarked, rejected) 64 | if redundant: 65 | raise ValueError( 66 | ('Conflicting classifications for LinkedIn Accounts ' 67 | ': {}').format(list(redundant)) 68 | ) 69 | accepted = set([canonicalize(a) for a in accepted]) 70 | unmarked = set([canonicalize(a) for a in unmarked]) 71 | rejected = set([canonicalize(a) for a in rejected]) 72 | return (accepted, unmarked, rejected) 73 | 74 | class LinkedIn(inquisitor.assets.Asset): 75 | 76 | def __init__(self, linkedin, owned=False): 77 | super(self.__class__, self).__init__(owned=owned) 78 | self.linkedin = canonicalize(linkedin) 79 | self.username = urlparse.urlparse(self.linkedin).path.split('/')[2] 80 | # TODO: This should be retrieved using linkedin api but we don't have 81 | # TODO: time for that, so fill it up using Google Search results 82 | # TODO: instead 83 | self.corporation = None 84 | 85 | def __eq__(self, other): 86 | if not isinstance(other, self.__class__): 87 | return False 88 | return self.linkedin == other.linkedin 89 | 90 | def related(self, repo): 91 | # Prepare the results 92 | results = set() 93 | # Related: Corporation 94 | if self.corporation: 95 | try: 96 | results.add(repo.get_asset_string( 97 | inquisitor.assets.registrant.Registrant, 98 | self.corporation, 99 | create=True, 100 | )[1]) 101 | except inquisitor.assets.registrant.RegistrantValidateException as e: 102 | logging.error(e.message) 103 | # Return the results 104 | return results 105 | 106 | def transform(self, repo, sources): 107 | # Prepare the results 108 | assets = set() 109 | # Return the results 110 | return assets 111 | 112 | def is_owned(self, repo): 113 | # If manually classified, return the classification 114 | if self.owned is not None: 115 | return self.owned 116 | # Automatically determine ownership 117 | if self.corporation: 118 | try: 119 | registrant = repo.get_asset_string( 120 | inquisitor.assets.registrant.Registrant, 121 | self.corporation 122 | ) 123 | if registrant and registrant[1].is_owned(repo): 124 | return True 125 | except inquisitor.assets.registrant.RegistrantValidateException as e: 126 | logging.error(e.message) 127 | return False 128 | 129 | def parent_asset(self, repo): 130 | # Prepare result variable 131 | parent = None 132 | # Check if registrant is a valid parent 133 | if parent is None: 134 | if self.corporation: 135 | try: 136 | registrant = repo.get_asset_string( 137 | inquisitor.assets.registrant.Registrant, 138 | self.corporation, 139 | ) 140 | if registrant and registrant[1].is_owned(repo): 141 | parent = registrant[1] 142 | return parent 143 | except inquisitor.assets.registrant.RegistrantValidateException as e: 144 | logging.error(e.message) 145 | # If no parental candidate is found, return None 146 | return None 147 | 148 | REPOSITORY = 'linkedins' 149 | ASSET_CLASS = LinkedIn 150 | OBJECT_ID = 'linkedin' -------------------------------------------------------------------------------- /tests/test_inquisitor.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import inq 3 | 4 | def setup(): 5 | pass 6 | 7 | def teardown(): 8 | pass 9 | 10 | def test_inquisitor(): 11 | # Accept Host 12 | inq.main(['classify', 'coke.db', '-ah', 'coca-cola.com']) 13 | # Accept Registrants 14 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA ENTERPRISES']) 15 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA BOTTLING COMPANY OF MINDEN, INC.']) 16 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA BOTTLING COMPANY OF MINDEN']) 17 | inq.main(['classify', 'coke.db', '-ar', 'COCA COLA NETWORK REDIRECT']) 18 | inq.main(['classify', 'coke.db', '-ar', 'THE COCA-COLA COMPANY']) 19 | inq.main(['classify', 'coke.db', '-ar', 'COCA-COLA HBC SERVICES MEPE']) 20 | # Reject Registrants 21 | inq.main(['classify', 'coke.db', '-rr', 'BH MEDIA GROUP INC.']) 22 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON TECHNOLOGIES']) 23 | inq.main(['classify', 'coke.db', '-rr', 'LINODE']) 24 | inq.main(['classify', 'coke.db', '-rr', 'HUBSPOT INC.']) 25 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON.COM']) 26 | inq.main(['classify', 'coke.db', '-rr', 'LINKEDIN CORPORATION']) 27 | inq.main(['classify', 'coke.db', '-rr', 'LEAF GROUP, LTD.']) 28 | inq.main(['classify', 'coke.db', '-rr', 'LEAF GROUP LTD.']) 29 | inq.main(['classify', 'coke.db', '-rr', 'DOMAIN PROTECTION SERVICES, INC.']) 30 | inq.main(['classify', 'coke.db', '-rr', 'CHRIS GASTON']) 31 | inq.main(['classify', 'coke.db', '-rr', 'NSONE INC']) 32 | inq.main(['classify', 'coke.db', '-rr', 'INTERNATIONAL MOTORSPORTS ASSOCIATION, LLC']) 33 | inq.main(['classify', 'coke.db', '-rr', 'SHYAMA ECONSULTANCY LIMITED']) 34 | inq.main(['classify', 'coke.db', '-rr', 'ALEXA INTERNET']) 35 | inq.main(['classify', 'coke.db', '-rr', 'POOL PRODUCTIONS']) 36 | inq.main(['classify', 'coke.db', '-rr', 'TWITTER, INC.']) 37 | inq.main(['classify', 'coke.db', '-rr', 'TWITTER INC.']) 38 | inq.main(['classify', 'coke.db', '-rr', 'INSTAGRAM, LLC']) 39 | inq.main(['classify', 'coke.db', '-rr', 'FACEBOOK, INC.']) 40 | inq.main(['classify', 'coke.db', '-rr', 'REGISTRARSEC, LLC']) 41 | inq.main(['classify', 'coke.db', '-rr', 'GET SATISFACTION']) 42 | inq.main(['classify', 'coke.db', '-rr', 'GOOGLE INC.']) 43 | inq.main(['classify', 'coke.db', '-rr', 'AUTOMATTIC, INC.']) 44 | inq.main(['classify', 'coke.db', '-rr', 'AUTOMATTIC, INC']) 45 | inq.main(['classify', 'coke.db', '-rr', 'DOMAINS BY PROXY, LLC']) 46 | inq.main(['classify', 'coke.db', '-rr', 'WHOIS PRIVACY SERVICE']) 47 | inq.main(['classify', 'coke.db', '-rr', 'SINGLEHOP, INC.']) 48 | inq.main(['classify', 'coke.db', '-rr', 'CYBERNET QUEST']) 49 | inq.main(['classify', 'coke.db', '-rr', 'QWEST COMMUNICATIONS COMPANY, LLC']) 50 | inq.main(['classify', 'coke.db', '-rr', 'GKG.NET DOMAIN PROXY SERVICE']) 51 | inq.main(['classify', 'coke.db', '-rr', 'INTRAWORLD COMMUNICATIONS CORPORATION']) 52 | inq.main(['classify', 'coke.db', '-rr', 'HYDROSOFT INTERNET']) 53 | inq.main(['classify', 'coke.db', '-rr', 'KSREGISTRY GMBH']) 54 | inq.main(['classify', 'coke.db', '-rr', 'OVH HOSTING, INC.']) 55 | inq.main(['classify', 'coke.db', '-rr', 'WOODYNET']) 56 | inq.main(['classify', 'coke.db', '-rr', 'KEY-SYSTEMS GMBH']) 57 | inq.main(['classify', 'coke.db', '-rr', 'ACTIVE MINDS GMBH']) 58 | inq.main(['classify', 'coke.db', '-rr', 'LIQUID WEB, L.L.C']) 59 | inq.main(['classify', 'coke.db', '-rr', 'WHOIS INC']) 60 | inq.main(['classify', 'coke.db', '-rr', 'PUBLICDOMAINREGISTRY.COM']) 61 | inq.main(['classify', 'coke.db', '-rr', 'CLOUDFLARE, INC.']) 62 | inq.main(['classify', 'coke.db', '-rr', 'PDR LTD.']) 63 | inq.main(['classify', 'coke.db', '-rr', 'ADVAMEG, INC.']) 64 | inq.main(['classify', 'coke.db', '-rr', 'INKTOMI CORPORATION']) 65 | inq.main(['classify', 'coke.db', '-rr', 'YAHOO! INC.']) 66 | inq.main(['classify', 'coke.db', '-rr', 'YAHOO! BROADCAST SERVICES, INC.']) 67 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER TECHNOLOGIES, INC.']) 68 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER CORPORATE C']) 69 | inq.main(['classify', 'coke.db', '-rr', 'DNSTINATION INC.']) 70 | inq.main(['classify', 'coke.db', '-rr', 'INTERNETNAMESFORBUSINESS.COM']) 71 | inq.main(['classify', 'coke.db', '-rr', 'INTERNATIONAL BUSINESS MACHINES CORPORATION']) 72 | inq.main(['classify', 'coke.db', '-rr', 'IBM']) 73 | inq.main(['classify', 'coke.db', '-rr', 'ENOM, INCORPORATED']) 74 | inq.main(['classify', 'coke.db', '-rr', 'OVH (NWK)']) 75 | inq.main(['classify', 'coke.db', '-rr', 'SOFTLAYER TECHNOLOGIES INC.']) 76 | inq.main(['classify', 'coke.db', '-rr', 'INFORMER TECHNOLOGIES, INC.']) 77 | inq.main(['classify', 'coke.db', '-rr', 'GODADDY.COM, LLC']) 78 | inq.main(['classify', 'coke.db', '-rr', 'GO DADDY OPERATING COMPANY, LLC']) 79 | inq.main(['classify', 'coke.db', '-rr', 'TUCOWS.COM CO.']) 80 | inq.main(['classify', 'coke.db', '-rr', 'TUCOWS.COM CO']) 81 | inq.main(['classify', 'coke.db', '-rr', 'NEUSTAR, INC.']) 82 | inq.main(['classify', 'coke.db', '-rr', 'DYN']) 83 | inq.main(['classify', 'coke.db', '-rr', 'DYN INC']) 84 | inq.main(['classify', 'coke.db', '-rr', 'AMAZON TECHNOLOGIES, INC.']) 85 | inq.main(['classify', 'coke.db', '-rr', 'AKAMAI TECHNOLOGIES, INC.']) 86 | inq.main(['classify', 'coke.db', '-rr', 'MARKMONITOR']) 87 | inq.main(['classify', 'coke.db', '-rr', 'DYNAMIC NETWORK SERVICES, INC.']) 88 | inq.main(['classify', 'coke.db', '-rr', 'MARKMONITOR INC.']) 89 | inq.main(['classify', 'coke.db', '-rr', 'WIKIMEDIA FOUNDATION INC.']) 90 | inq.main(['classify', 'coke.db', '-rr', 'WIKIMEDIA FOUNDATION, INC.']) 91 | inq.main(['classify', 'coke.db', '-rr', 'INSALA, LLC']) 92 | # Test Dump 93 | inq.main(['dump', 'coke.db', '-j', 'coke.json']) 94 | # Test Status 95 | inq.main(['status', 'coke.db']) 96 | # Test Status Strong 97 | inq.main(['status', 'coke.db', '--strong']) -------------------------------------------------------------------------------- /inquisitor/assets/block.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets 2 | import inquisitor.assets.registrant 3 | import ipwhois 4 | import logging 5 | import netaddr 6 | 7 | class BlockValidateException(Exception): 8 | pass 9 | 10 | def canonicalize(block): 11 | if not block: 12 | raise BlockValidateException('Blocks cannot be None') 13 | if not isinstance(block, str) and not isinstance(block, unicode): 14 | raise BlockValidateException('Blocks must be strings') 15 | network = None 16 | try: 17 | network = netaddr.IPNetwork(block) 18 | except netaddr.core.AddrFormatError: 19 | raise BlockValidateException('Unable to parse block {}'.format(block)) 20 | return str(network) 21 | 22 | def main_classify_args(parser): 23 | parser.add_argument( 24 | '-ab', '--accept-block', 25 | metavar='BLOCK', 26 | type=canonicalize, 27 | nargs='+', 28 | help='Specifies a block to classify as accepted.', 29 | dest='blocks_accepted', 30 | default=list(), 31 | ) 32 | parser.add_argument( 33 | '-ub', '--unmark-block', 34 | metavar='BLOCK', 35 | type=canonicalize, 36 | nargs='+', 37 | help='Specifies a block to classify as unmarked.', 38 | dest='blocks_unmarked', 39 | default=list(), 40 | ) 41 | parser.add_argument( 42 | '-rb', '--reject-block', 43 | metavar='BLOCK', 44 | type=canonicalize, 45 | nargs='+', 46 | help='Specifies a block to classify as rejected.', 47 | dest='blocks_rejected', 48 | default=list(), 49 | ) 50 | 51 | def main_classify_canonicalize(args): 52 | accepted = set(args.blocks_accepted) 53 | unmarked = set(args.blocks_unmarked) 54 | rejected = set(args.blocks_rejected) 55 | redundant = set.intersection(accepted, unmarked, rejected) 56 | if redundant: 57 | raise ValueError( 58 | ('Conflicting classifications for blocks ' 59 | ': {}').format(list(redundant)) 60 | ) 61 | accepted = set([canonicalize(a) for a in accepted]) 62 | unmarked = set([canonicalize(a) for a in unmarked]) 63 | rejected = set([canonicalize(a) for a in rejected]) 64 | return (accepted, unmarked, rejected) 65 | 66 | class Block(inquisitor.assets.Asset): 67 | 68 | def __init__(self, block, owned=None): 69 | super(self.__class__, self).__init__(owned=owned) 70 | self.block = canonicalize(block) 71 | # Acquire IP whois for block 72 | ip = str(netaddr.IPNetwork(self.block).ip) 73 | info = ipwhois.ipwhois.IPWhois(ip).lookup_rdap() 74 | self.registrant = None 75 | if (info.get('network') and info.get('network').get('cidr') 76 | and info.get('network').get('cidr') == self.block): 77 | for key, obj in info['objects'].iteritems(): 78 | if obj.get('roles') and 'registrant' in obj.get('roles'): 79 | if obj.get('contact') and obj.get('contact').get('kind') == 'org': 80 | name = obj['contact']['name'] 81 | registrant = inquisitor.assets.registrant.canonicalize( 82 | name 83 | ) 84 | self.registrant = registrant 85 | break 86 | 87 | def __eq__(self, other): 88 | if not isinstance(other, self.__class__): 89 | return False 90 | return self.block == other.block 91 | 92 | def related(self, repo): 93 | # Prepare the results 94 | results = set() 95 | # Related: Registrant 96 | if self.registrant: 97 | try: 98 | results.add(repo.get_asset_string( 99 | inquisitor.assets.registrant.Registrant, 100 | self.registrant, 101 | create=True, 102 | )[1]) 103 | except inquisitor.assets.registrant.RegistrantValidateException as e: 104 | logging.error(e.message) 105 | # Return the results 106 | return results 107 | 108 | def transform(self, repo, sources): 109 | # Prepare the results 110 | assets = set() 111 | # Shodan Transforms 112 | if sources.get('shodan'): 113 | subassets = self.cache_transform_get('shodan', repo) 114 | if not subassets: 115 | # Acquire API 116 | shodan = sources['shodan'] 117 | # Query: Network 118 | subassets.update(shodan.transform( 119 | repo, 'net:"{}"'.format(self.block)) 120 | ) 121 | # Cache The Transform 122 | self.cache_transform_store('shodan', subassets) 123 | assets.update(subassets) 124 | # Return the results 125 | return assets 126 | 127 | def is_owned(self, repo): 128 | # If manually classified, return the classification 129 | if self.owned is not None: 130 | return self.owned 131 | # Automatically determine ownership 132 | if self.registrant: 133 | try: 134 | registrant = repo.get_asset_string( 135 | inquisitor.assets.registrant.Registrant, 136 | self.registrant, 137 | ) 138 | if registrant and registrant[1].is_owned(repo): 139 | return True 140 | except inquisitor.assets.registrant.RegistrantValidateException as e: 141 | logging.error(e.message) 142 | return False 143 | 144 | def parent_asset(self, repo): 145 | # Prepare result variable 146 | parent = None 147 | # Check if this is a child of another netblock 148 | if parent is None: 149 | # Acquire start and end IPs of this netblock 150 | network = netaddr.IPNetwork(self.block) 151 | network_start = network.ip & network.netmask 152 | network_end = network_start + (network.size - 1) 153 | # Acquire other owned netblocks 154 | blocks = repo.get_assets( 155 | include=lambda o,d: ( 156 | self != o and 157 | isinstance(o, self.__class__) and 158 | o.is_owned(repo) 159 | ) 160 | ) 161 | # Check if this netblock is a child of another netblock 162 | parents = list() 163 | for block in blocks: 164 | # Acquire start and end IPs of the other netblock 165 | other = netaddr.IPNetwork(block.block) 166 | other_start = other.ip & other.netmask 167 | other_end = other_start + (other.size - 1) 168 | # Check if self is contained by the other netblock 169 | contained = ( 170 | other_start <= network_start and 171 | network_end <= other_end and 172 | network.size < other.size 173 | ) 174 | # If contained, add as potential parent 175 | candidate = [block, other] 176 | if contained and candidate not in parents: 177 | parents.append(candidate) 178 | # Return the smallest parent 179 | if parents: 180 | parent = min(parents, key=lambda e: e[1])[0] 181 | return parent 182 | # Check if registrant is a valid parent 183 | if parent is None: 184 | if self.registrant: 185 | try: 186 | registrant = repo.get_asset_string( 187 | inquisitor.assets.registrant.Registrant, 188 | self.registrant, 189 | ) 190 | if registrant and registrant[1].is_owned(repo): 191 | parent = registrant[1] 192 | return parent 193 | except inquisitor.assets.registrant.RegistrantValidateException as e: 194 | logging.error(e.message) 195 | # If no parental candidate is found, return None 196 | return None 197 | 198 | REPOSITORY = 'blocks' 199 | ASSET_CLASS = Block 200 | OBJECT_ID = 'block' -------------------------------------------------------------------------------- /inquisitor/assets/host.py: -------------------------------------------------------------------------------- 1 | import inquisitor.assets 2 | import inquisitor.assets.block 3 | import inquisitor.assets.email 4 | import inquisitor.assets.registrant 5 | import ipwhois 6 | import logging 7 | import netaddr 8 | import socket 9 | import tld 10 | import whois 11 | 12 | class HostValidateException(Exception): 13 | pass 14 | 15 | def canonicalize(host): 16 | if not host: 17 | raise HostValidateException('Hosts cannot be None') 18 | if not isinstance(host, str) and not isinstance(host, unicode): 19 | raise HostValidateException('Hosts must be strings') 20 | host = host.strip().lower() 21 | try: 22 | tld.get_tld('http://{}'.format(host)) 23 | except tld.exceptions.TldDomainNotFound: 24 | raise HostValidateException('Invalid tld for host {}'.format(host)) 25 | return host 26 | 27 | def main_classify_args(parser): 28 | parser.add_argument( 29 | '-ah', '--accept-host', 30 | metavar='HOST', 31 | type=canonicalize, 32 | nargs='+', 33 | help='Specifies a host to classify as accepted.', 34 | dest='hosts_accepted', 35 | default=list(), 36 | ) 37 | parser.add_argument( 38 | '-uh', '--unmark-host', 39 | metavar='HOST', 40 | type=canonicalize, 41 | nargs='+', 42 | help='Specifies a host to classify as unmarked.', 43 | dest='hosts_unmarked', 44 | default=list(), 45 | ) 46 | parser.add_argument( 47 | '-rh', '--reject-host', 48 | metavar='HOST', 49 | type=canonicalize, 50 | nargs='+', 51 | help='Specifies a host to classify as rejected.', 52 | dest='hosts_rejected', 53 | default=list(), 54 | ) 55 | 56 | def main_classify_canonicalize(args): 57 | accepted = set(args.hosts_accepted) 58 | unmarked = set(args.hosts_unmarked) 59 | rejected = set(args.hosts_rejected) 60 | redundant = set.intersection(accepted, unmarked, rejected) 61 | if redundant: 62 | raise ValueError( 63 | ('Conflicting classifications for hosts ' 64 | ': {}').format(list(redundant)) 65 | ) 66 | accepted = set([canonicalize(a) for a in accepted]) 67 | unmarked = set([canonicalize(a) for a in unmarked]) 68 | rejected = set([canonicalize(a) for a in rejected]) 69 | return (accepted, unmarked, rejected) 70 | 71 | class Host(inquisitor.assets.Asset): 72 | 73 | def __init__(self, host, owned=None): 74 | super(self.__class__, self).__init__(owned=owned) 75 | self.host = canonicalize(host) 76 | # Acquire parent domain 77 | self.parent = None 78 | zones = self.host.split('.') 79 | if len(zones) > 1: 80 | self.parent = canonicalize('.'.join(zones[1:])) 81 | # Acquire IP address 82 | self.ip = None 83 | try: self.ip = socket.gethostbyname(self.host) 84 | except: pass 85 | # Acquire whois information 86 | self.registrant = None 87 | self.emails = set() 88 | self.nameservers = set() 89 | if self.ip: 90 | info = whois.whois(self.host) 91 | if info.get('org'): 92 | self.registrant = inquisitor.assets.registrant.canonicalize( 93 | info['org'] 94 | ) 95 | if info.get('emails'): 96 | if type(info['emails']) is list: 97 | for email in info['emails']: 98 | self.emails.add(inquisitor.assets.email.canonicalize(email)) 99 | elif type(info['emails']) in [str, unicode]: 100 | email = info['emails'] 101 | self.emails.add(inquisitor.assets.email.canonicalize(email)) 102 | if info.get('name_servers'): 103 | if type(info['name_servers']) is list: 104 | for nameserver in info['name_servers']: 105 | self.nameservers.add(canonicalize(nameserver)) 106 | elif type(info['name_servers']) in [str, unicode]: 107 | nameserver = info['name_servers'] 108 | self.nameservers.add(canonicalize(nameserver)) 109 | self.emails = list(self.emails) 110 | self.nameservers = list(self.nameservers) 111 | # Acquire IP whois information 112 | self.blocks = set() 113 | if self.ip: 114 | tries = 0 115 | while tries < 3: 116 | try: 117 | info = ipwhois.ipwhois.IPWhois(self.ip).lookup_rdap() 118 | for block in info['network']['cidr'].split(','): 119 | block = inquisitor.assets.block.canonicalize(block.strip()) 120 | self.blocks.add(block) 121 | tries += 1 122 | except ipwhois.exceptions.HTTPLookupError: 123 | continue 124 | except ipwhois.exceptions.HTTPRateLimitError: 125 | continue 126 | except ipwhois.exceptions.IPDefinedError: 127 | continue 128 | self.blocks = list(self.blocks) 129 | 130 | def __eq__(self, other): 131 | if not isinstance(other, self.__class__): 132 | return False 133 | return self.host == other.host 134 | 135 | def related(self, repo): 136 | # Prepare results 137 | results = set() 138 | # Related: Parent 139 | if self.parent and len(self.parent.split('.')) > 1: 140 | try: 141 | results.add(repo.get_asset_string( 142 | Host, 143 | self.parent, 144 | create=True, 145 | )[1]) 146 | except HostValidateException as e: 147 | logging.error(e.message) 148 | # Related: Registrant 149 | if self.registrant: 150 | try: 151 | results.add(repo.get_asset_string( 152 | inquisitor.assets.registrant.Registrant, 153 | self.registrant, 154 | create=True, 155 | )[1]) 156 | except inquisitor.assets.registrant.RegistrantValidateException as e: 157 | logging.error(e.message) 158 | # Related: Emails 159 | for email in self.emails: 160 | try: 161 | results.add(repo.get_asset_string( 162 | inquisitor.assets.email.Email, 163 | email, 164 | create=True, 165 | )[1]) 166 | except inquisitor.assets.email.EmailValidateException as e: 167 | logging.error(e.message) 168 | # Related: Nameservers 169 | for nameserver in self.nameservers: 170 | try: 171 | results.add(repo.get_asset_string( 172 | Host, 173 | nameserver, 174 | create=True, 175 | )[1]) 176 | except HostValidateException as e: 177 | logging.error(e.message) 178 | # Related: Blocks 179 | for block in self.blocks: 180 | try: 181 | results.add(repo.get_asset_string( 182 | inquisitor.assets.block.Block, 183 | block, 184 | create=True, 185 | )[1]) 186 | except inquisitor.assets.block.BlockValidateException as e: 187 | logging.error(e.message) 188 | # Return the results 189 | return results 190 | 191 | def transform(self, repo, sources): 192 | # Prepare the results 193 | assets = set() 194 | # Google Transforms 195 | if sources.get('google'): 196 | subassets = self.cache_transform_get('google', repo) 197 | if not subassets: 198 | # Acquire API 199 | google = sources['google'] 200 | # Query: Site 201 | subassets.update(google.transform( 202 | repo, 'site:{}'.format(self.host)) 203 | ) 204 | # Query: Email 205 | subassets.update(google.transform( 206 | repo, '"@{}"'.format(self.host)) 207 | ) 208 | # Cache The Transform 209 | self.cache_transform_store('google', subassets) 210 | assets.update(subassets) 211 | # Shodan Transforms 212 | if sources.get('shodan'): 213 | subassets = self.cache_transform_get('shodan', repo) 214 | if not subassets: 215 | # Acquire API 216 | shodan = sources['shodan'] 217 | # Query: Plain 218 | subassets.update(shodan.transform(repo, self.host)) 219 | # Query: Hostname 220 | subassets.update(shodan.transform( 221 | repo, 'hostname:"{}"'.format(self.host)) 222 | ) 223 | # Cache The Transform 224 | self.cache_transform_store('shodan', subassets) 225 | assets.update(subassets) 226 | # Return the results 227 | return assets 228 | 229 | def is_owned(self, repo): 230 | # If manually classified, return the classification 231 | if self.owned is not None: 232 | return self.owned 233 | # Automatically determine ownership 234 | if self.parent: 235 | try: 236 | parent = repo.get_asset_string(Host, self.parent) 237 | if parent and parent[1].is_owned(repo): 238 | return True 239 | except HostValidateException as e: 240 | logging.error(e.message) 241 | if self.registrant: 242 | try: 243 | registrant = repo.get_asset_string( 244 | inquisitor.assets.registrant.Registrant, 245 | self.registrant 246 | ) 247 | if registrant and registrant[1].is_owned(repo): 248 | return True 249 | except inquisitor.assets.registrant.RegistrantValidateException as e: 250 | logging.error(e.message) 251 | return False 252 | 253 | def parent_asset(self, repo): 254 | # Prepare result variable 255 | parent = None 256 | # Check if this host is the child of another domain 257 | if parent is None: 258 | if self.parent: 259 | try: 260 | domain = repo.get_asset_string(Host, self.parent) 261 | if domain and domain[1].is_owned(repo): 262 | parent = domain[1] 263 | return parent 264 | except HostValidateException as e: 265 | logging.error(e.message) 266 | # Check if this host is the child of a network 267 | if parent is None: 268 | if self.ip: 269 | # Acquire owned netblocks where self is contained 270 | address = netaddr.IPAddress(self.ip) 271 | blocks = repo.get_assets( 272 | include=lambda o,d: ( 273 | isinstance(o, inquisitor.assets.block.Block) and 274 | o.is_owned(repo) and 275 | address in netaddr.IPNetwork(o.block) 276 | ) 277 | ) 278 | # Collate blocks and their corresponding network object 279 | parents = [ 280 | [block, netaddr.IPNetwork(block.block)] 281 | for block in blocks 282 | ] 283 | # Return the smallest parent 284 | if parents: 285 | parent = min(parents, key=lambda e: e[1])[0] 286 | return parent 287 | # Check if registrant is a valid parent 288 | if parent is None: 289 | if self.registrant: 290 | try: 291 | registrant = repo.get_asset_string( 292 | inquisitor.assets.registrant.Registrant, 293 | self.registrant, 294 | ) 295 | if registrant and registrant[1].is_owned(repo): 296 | parent = registrant[1] 297 | return parent 298 | except inquisitor.assets.registrant.RegistrantValidateException as e: 299 | logging.error(e.message) 300 | # If no parental candidate is found, return None 301 | return None 302 | 303 | REPOSITORY = 'hosts' 304 | ASSET_CLASS = Host 305 | OBJECT_ID = 'host' -------------------------------------------------------------------------------- /inq: -------------------------------------------------------------------------------- 1 | import argparse 2 | import inquisitor 3 | import inquisitor.sources.google_search 4 | import inquisitor.sources.shodan_search 5 | import json 6 | import logging 7 | import os 8 | import SimpleHTTPServer 9 | import SocketServer 10 | import sys 11 | import tabulate 12 | import webbrowser 13 | 14 | # Ininitialize Logging 15 | logger = logging.getLogger(__name__) 16 | logger.setLevel(logging.INFO) 17 | handler = logging.StreamHandler(sys.stdout) 18 | handler.setLevel(logging.INFO) 19 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 20 | handler.setFormatter(formatter) 21 | logger.addHandler(handler) 22 | 23 | def database(path): 24 | return inquisitor.IntelligenceRepository(path) 25 | 26 | def scan( 27 | repository, 28 | google_dev_key=None, 29 | google_cse_id=None, 30 | google_limit=None, 31 | shodan_api_key=None, 32 | shodan_limit=None, 33 | ): 34 | sources = dict() 35 | # Initialize Google as a transform source 36 | if not google_dev_key or not google_cse_id: 37 | if not google_dev_key: 38 | logger.warning( 39 | 'Skipping Google Transforms. No GOOGLE_DEV_KEY provided. ' 40 | 'Please provide the GOOGLE_DEV_KEY using the --google-dev-key ' 41 | 'parameter.' 42 | ) 43 | if not google_cse_id: 44 | logger.warning( 45 | 'Skipping Google Transforms. No GOOGLE_CSE_ID provided. ' 46 | 'Please provide the GOOGLE_CSE_ID using the --google-cse-id ' 47 | 'parameter.' 48 | ) 49 | else: 50 | sources['google'] = inquisitor.sources.google_search.GoogleAPI( 51 | google_dev_key, google_cse_id, limit=google_limit 52 | ) 53 | if not google_limit: 54 | logger.warning( 55 | 'Google Search limit not set. This may potentially exhaust ' 56 | 'the daily quota of your Google API Key.' 57 | ) 58 | # Initialize Shodan as a transform source 59 | if not shodan_api_key: 60 | logger.warning( 61 | 'Skipping Shodan Transforms. No SHODAN_API_KEY provided. ' 62 | 'Please provide the SHODAN_API_KEY using the --shodan-api-key ' 63 | 'parameter.' 64 | ) 65 | else: 66 | sources['shodan'] = inquisitor.sources.shodan_search.ShodanAPI( 67 | shodan_api_key, limit=shodan_limit 68 | ) 69 | if not shodan_limit: 70 | logger.warning( 71 | 'Shodan Search limit not set. This may potentially exhaust ' 72 | 'the daily quota of your Shodan API Key.' 73 | ) 74 | # Check if any sources detected 75 | if not sources: 76 | logger.error('No valid transform sources available. Quitting.') 77 | exit(1) 78 | # Perform transforms on owned assets only 79 | found = 0 80 | logger.info('Initializing Inquisitor scan mode') 81 | owned = repository.get_assets(include=lambda o,d: o.is_owned(repository)) 82 | if not owned: 83 | logger.error( 84 | 'No assets available to transform. Please seed your database ' 85 | 'using the "classify" command.' 86 | ) 87 | exit(1) 88 | for asset in owned: 89 | asset_type = asset.__class__ 90 | asset_module_name = asset_type.__module__ 91 | asset_module = sys.modules[asset_module_name] 92 | asset_identifier = getattr(asset, asset_module.OBJECT_ID) 93 | logger.info('Transforming: {}: {}'.format( 94 | asset_module_name, 95 | asset_identifier, 96 | )) 97 | for result in asset.transform(repository, sources): 98 | __id = repository.put_asset_object(result) 99 | if __id: 100 | result_type = result.__class__ 101 | result_module_name = result_type.__module__ 102 | result_module = sys.modules[result_module_name] 103 | result_identifier = getattr(result, result_module.OBJECT_ID) 104 | logger.info('Found: {}: {}'.format( 105 | result_module_name, 106 | result_identifier, 107 | )) 108 | found += 1 109 | repository.put_asset_object(asset, overwrite=True) 110 | logger.info('New assets found: {}'.format(found)) 111 | logger.info('Inquisitor has completed') 112 | 113 | def status(repository, strong): 114 | table = [ 115 | ['Asset', 'Accepted', 'Unknown', 'Rejected', 'Total'], 116 | list(), 117 | ] 118 | for asset_module in inquisitor.ASSET_MODULES: 119 | asset_type = asset_module.ASSET_CLASS 120 | total = 0 121 | row = [asset_type.__name__] 122 | for owned in [True, None, False]: 123 | results = repository.get_assets( 124 | include=( 125 | lambda o,d: 126 | isinstance(o, asset_type) and ( 127 | (not strong and o.is_owned(repository) is owned) or 128 | (strong and d['owned'] is owned) 129 | ) 130 | ) 131 | ) 132 | row.append(len(results)) 133 | total += len(results) 134 | row.append(total) 135 | table.append(row) 136 | if not strong: 137 | table[0][3] = 'Not Accepted' 138 | for row in table: 139 | if row: 140 | del row[2] 141 | print tabulate.tabulate(table) 142 | 143 | def classify(repository, args): 144 | for asset_module in inquisitor.ASSET_MODULES: 145 | # Extract assets from arguments 146 | classified = asset_module.main_classify_canonicalize(args) 147 | accepted, unmarked, rejected = classified 148 | targets = [ 149 | (accepted, True), 150 | (unmarked, None), 151 | (rejected, False), 152 | ] 153 | # Execute asset classification 154 | for target, owned in targets: 155 | for identifier in target: 156 | repository.put_asset_string( 157 | asset_module.ASSET_CLASS, 158 | identifier, 159 | owned=owned, 160 | overwrite=True 161 | ) 162 | 163 | def dump(repository, path, all_flag): 164 | repo_dict = dict() 165 | for asset_module in inquisitor.ASSET_MODULES: 166 | asset_type = asset_module.ASSET_CLASS 167 | asset_list = list() 168 | results = repository.get_assets( 169 | include=lambda o,d: isinstance(o, asset_type) 170 | ) 171 | for asset in results: 172 | if all_flag or asset.owned is not False: 173 | asset_entry = dict(asset.__dict__) 174 | asset_entry['owned'] = asset.is_owned(repository) 175 | asset_entry['strong_owned'] = asset.owned 176 | asset_list.append(asset_entry) 177 | repo_dict[asset_module.REPOSITORY] = list(reversed(sorted( 178 | asset_list, key=lambda a: a['owned'] 179 | ))) 180 | if path is None: 181 | print json.dumps(repo_dict, indent=4, sort_keys=True) 182 | else: 183 | with open(path, 'w') as handle: 184 | json.dump(repo_dict, handle, indent=4, sort_keys=True) 185 | 186 | def visualize(repository, use_last=False): 187 | # Initialize web server directory 188 | web_dir = os.path.join(os.path.dirname(__file__), 'report') 189 | os.chdir(web_dir) 190 | if not use_last: 191 | def traverse(node, asset): 192 | # Determine name of node 193 | if asset: 194 | asset_type = asset.__class__ 195 | asset_module = sys.modules[asset_type.__module__] 196 | node['name'] = '{} : {}'.format( 197 | asset_type.__name__, 198 | getattr(asset, asset_module.OBJECT_ID) 199 | ) 200 | else: 201 | node['name'] = 'root' 202 | # Determine node children 203 | children = repository.get_assets( 204 | include=lambda o,d: 205 | o.is_owned(repository) and 206 | o.parent_asset(repository) == asset 207 | ) 208 | if children: 209 | node['children'] = list() 210 | for child in children: 211 | subnode = dict() 212 | traverse(subnode, child) 213 | node['children'].append(subnode) 214 | else: 215 | node['size'] = 1 216 | # Start traversal 217 | root = {} 218 | traverse(root, None) 219 | # Dump visualization to JSON file 220 | with open('report.json', 'w') as handle: 221 | json.dump(root, handle, indent=4, sort_keys=True) 222 | # Start HTTP Server 223 | port = 8080 224 | webbrowser.open('http://localhost:{}/index.html'.format(port), new=2) 225 | http_handler = SimpleHTTPServer.SimpleHTTPRequestHandler 226 | httpd = SocketServer.TCPServer(("", port), http_handler) 227 | httpd.serve_forever() 228 | 229 | def main(cmd_args): 230 | 231 | # Create main argument parser 232 | parent_parser = argparse.ArgumentParser(add_help=False) 233 | parent_parser.add_argument( 234 | 'database', 235 | metavar='DATABASE', 236 | type=database, 237 | help=( 238 | 'The path to the intelligence database to use. If specified file ' 239 | 'does not exist, a new one will be created.' 240 | ), 241 | ) 242 | 243 | # Create subcommand parsers 244 | main_parser = argparse.ArgumentParser() 245 | commands_subparsers = main_parser.add_subparsers( 246 | title='command', 247 | help='The action to perform.', 248 | dest='command', 249 | ) 250 | 251 | # Parse arguments for scan command 252 | scan_parser = commands_subparsers.add_parser( 253 | 'scan', 254 | help=( 255 | 'Search OSINT sources for intelligence based on known assets ' 256 | 'belonging to the target.' 257 | ), 258 | parents=[parent_parser], 259 | ) 260 | scan_parser.add_argument( 261 | '--google-dev-key', 262 | metavar='GOOGLE_DEV_KEY', 263 | type=str, 264 | help=( 265 | 'Specifies the developer key to use to query Google Custom ' 266 | 'Search. Visit the Google APIs Console ' 267 | '(http://code.google.com/apis/console) to get an API key. If not' 268 | 'specified, the script will simply skip asset transforms that ' 269 | 'involve Google Search.' 270 | ), 271 | dest='google_dev_key', 272 | ) 273 | scan_parser.add_argument( 274 | '--google-cse-id', 275 | metavar='GOOGLE_CSE_ID', 276 | type=str, 277 | help=( 278 | 'Specifies the custom search engine to query. Visit the Google ' 279 | 'Custom Search Console (https://cse.google.com/cse/all) to create ' 280 | 'your own Google Custom Search Engine. If not specified, the ' 281 | 'script will simply skip asset transforms that involve Google ' 282 | 'Search.' 283 | ), 284 | dest='google_cse_id', 285 | ) 286 | scan_parser.add_argument( 287 | '--google-limit', 288 | metavar='GOOGLE_LIMIT', 289 | type=int, 290 | help=( 291 | 'The number of pages to limit Google Search to. This is to avoid ' 292 | 'exhausting your daily quota.' 293 | ), 294 | default=None, 295 | ) 296 | scan_parser.add_argument( 297 | '--shodan-api-key', 298 | metavar='SHODAN_API_KEY', 299 | type=str, 300 | help=( 301 | 'Specifies the API key to use to query Shodan. Log into your ' 302 | 'Shodan account (https://www.shodan.io/) and look at the top ' 303 | 'right corner of the page in order to view your API key. If not ' 304 | 'specified, the script will simply skip asset transforms that ' 305 | 'involve Shodan.' 306 | ), 307 | dest='shodan_api_key', 308 | ) 309 | scan_parser.add_argument( 310 | '--shodan-limit', 311 | metavar='SHODAN_LIMIT', 312 | type=int, 313 | help=( 314 | 'The number of pages to limit Shodan Search to. This is to avoid ' 315 | 'exhausting your daily quota.' 316 | ), 317 | default=None, 318 | ) 319 | 320 | # Parse arguments for status command 321 | status_parser = commands_subparsers.add_parser( 322 | 'status', 323 | help=( 324 | 'Prints out the current status of the specified intelligence ' 325 | 'database.' 326 | ), 327 | parents=[parent_parser], 328 | ) 329 | status_parser.add_argument( 330 | '-s', '--strong', 331 | help=( 332 | 'Indicates if the status will be based on the strong ownership ' 333 | 'classification.' 334 | ), 335 | action='store_true', 336 | default=False, 337 | ) 338 | 339 | # Parse arguments for classify command 340 | classify_parser = commands_subparsers.add_parser( 341 | 'classify', 342 | help=( 343 | 'Classifies an existing asset as either belonging or not ' 344 | 'belonging to the target. Adds a new asset with the specified ' 345 | 'classification if none is present.' 346 | ), 347 | parents=[parent_parser], 348 | ) 349 | for asset_module in inquisitor.ASSET_MODULES: 350 | asset_module.main_classify_args(classify_parser) 351 | 352 | # Parse arguments for dump command 353 | dump_parser = commands_subparsers.add_parser( 354 | 'dump', 355 | help='Dumps the contents of the database into a JSON file', 356 | parents=[parent_parser], 357 | ) 358 | dump_parser.add_argument( 359 | '-j', '--json', 360 | metavar='FILE', 361 | type=str, 362 | help='The path to dump the JSON file to. Overwrites existing files.', 363 | ) 364 | dump_parser.add_argument( 365 | '-a', '--all', 366 | help='Include rejected assets in dump.', 367 | action='store_true', 368 | default=False, 369 | ) 370 | 371 | # Parse arguments for visualize command 372 | visualize_parser = commands_subparsers.add_parser( 373 | 'visualize', 374 | help=( 375 | 'Create a D3.js visualization based on the contents of the ' 376 | 'specified intelligence database.' 377 | ), 378 | parents=[parent_parser], 379 | ) 380 | visualize_parser.add_argument( 381 | '-l', '--last', 382 | help=( 383 | 'Simply open the last visualization generated instead of creating ' 384 | 'a new one.' 385 | ), 386 | action='store_true', 387 | default=False, 388 | ) 389 | 390 | # Perform actual parsing of arguments 391 | args = main_parser.parse_args(cmd_args) 392 | 393 | # Determine chosen command and pass to appropriate subroutine 394 | if args.command == 'scan': 395 | scan( 396 | args.database, 397 | google_dev_key=args.google_dev_key, 398 | google_cse_id=args.google_cse_id, 399 | google_limit=args.google_limit, 400 | shodan_api_key=args.shodan_api_key, 401 | shodan_limit=args.shodan_limit, 402 | ) 403 | exit(0) 404 | if args.command == 'status': 405 | status(args.database, args.strong) 406 | return 407 | if args.command == 'classify': 408 | classify(args.database, args) 409 | return 410 | if args.command == 'dump': 411 | dump(args.database, args.json, args.all) 412 | return 413 | if args.command == 'visualize': 414 | visualize(args.database, args.last) 415 | return 416 | 417 | # Entry Point 418 | if __name__ == '__main__': 419 | # Call the main function 420 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Inquisitor 2 | 3 | > #### Notice 4 | > This project is only partially complete and I have yet to implement many of the features described in the following blog post I made: https://penafieljlm.com/2017/07/14/inquisitor/. 5 | 6 | Inquisitor is a simple tool for gathering information on companies and organizations through the use of Open Source Intelligence (OSINT) sources. It is heavily inspired from how Maltego and recon-ng operates, and the tool pretty much re-implements some of the features of those tools but adds an additonal layer of opinion-based semantics on top of asset types in order to create an easy-to-use workflow. 7 | 8 | The key features of Inquisitor include: 9 | 10 | 1. The ability to cascade the ownership label of an asset (e.g. if a Registrant Name is known to belong to the target organization, then the hosts and networks registered with that name shall be marked as belonging to the target organization) 11 | 2. The ability transform assets into other potentially related assets through querying open sources such as Google and Shodan 12 | 3. The ability to visualize the relationships of those assets through a zoomable pack layout 13 | 14 | ## Concept 15 | 16 | The whole concept of Inquisitor revolves around the idea of extracting information from open sources based on what is already known about a target organization. In the context of Inquisitor these are called "transforms". Related information may also be immidiately retrieved from an known asset based on metadata also retrievable from open sources such as whois and internet registries. 17 | 18 | The concepts are discussed in further detail in this blog article: https://penafieljlm.com/2017/07/14/inquisitor/ 19 | 20 | ## Installation 21 | 22 | To install Inquisitor, simply clone the repository, enter it, and execute the installation script. 23 | ``` 24 | pip install Cython click 25 | git clone git@github.com:penafieljlm/inquisitor.git 26 | cd inquisitor 27 | python setup.py install 28 | ``` 29 | 30 | ## Usage 31 | 32 | Inquisitor has five basic commands which include `scan`, `status`, `classify`, `dump`, and `visualize`. 33 | ``` 34 | usage: inq [-h] {scan,status,classify,dump,visualize} ... 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | 39 | command: 40 | {scan,status,classify,dump,visualize} 41 | The action to perform. 42 | scan Search OSINT sources for intelligence based on known 43 | assets belonging to the target. 44 | status Prints out the current status of the specified 45 | intelligence database. 46 | classify Classifies an existing asset as either belonging or 47 | not belonging to the target. Adds a new asset with the 48 | specified classification if none is present. 49 | dump Dumps the contents of the database into a JSON file 50 | visualize Create a D3.js visualization based on the contents of 51 | the specified intelligence database. 52 | ``` 53 | 54 | ### Scan 55 | 56 | In scan mode, the tool runs all available transforms for all the assets you have in your Intelligence Database. Make sure to create API Keys for the various OSINT sources indicated below and provide it to the script lest the transforms using those sources be skipped. Also, make sure you seed your Intelligence Database with some known owned target assets using the `classify` command first because if the database does not contain any owned assets, there will be nothing to transform. 57 | ``` 58 | usage: inq scan [-h] [--google-dev-key GOOGLE_DEV_KEY] 59 | [--google-cse-id GOOGLE_CSE_ID] 60 | [--google-limit GOOGLE_LIMIT] 61 | [--shodan-api-key SHODAN_API_KEY] 62 | [--shodan-limit SHODAN_LIMIT] 63 | DATABASE 64 | 65 | positional arguments: 66 | DATABASE The path to the intelligence database to use. If 67 | specified file does not exist, a new one will be 68 | created. 69 | 70 | optional arguments: 71 | -h, --help show this help message and exit 72 | --google-dev-key GOOGLE_DEV_KEY 73 | Specifies the developer key to use to query Google 74 | Custom Search. Visit the Google APIs Console 75 | (http://code.google.com/apis/console) to get an API 76 | key. If notspecified, the script will simply skip 77 | asset transforms that involve Google Search. 78 | --google-cse-id GOOGLE_CSE_ID 79 | Specifies the custom search engine to query. Visit the 80 | Google Custom Search Console 81 | (https://cse.google.com/cse/all) to create your own 82 | Google Custom Search Engine. If not specified, the 83 | script will simply skip asset transforms that involve 84 | Google Search. 85 | --google-limit GOOGLE_LIMIT 86 | The number of pages to limit Google Search to. This is 87 | to avoid exhausting your daily quota. 88 | --shodan-api-key SHODAN_API_KEY 89 | Specifies the API key to use to query Shodan. Log into 90 | your Shodan account (https://www.shodan.io/) and look 91 | at the top right corner of the page in order to view 92 | your API key. If not specified, the script will simply 93 | skip asset transforms that involve Shodan. 94 | --shodan-limit SHODAN_LIMIT 95 | The number of pages to limit Shodan Search to. This is 96 | to avoid exhausting your daily quota. 97 | ``` 98 | 99 | ### Status 100 | 101 | In status mode, the tool simply prints out a quick summary of the status of your scan database. 102 | ``` 103 | usage: inq status [-h] [-s] DATABASE 104 | 105 | positional arguments: 106 | DATABASE The path to the intelligence database to use. If specified 107 | file does not exist, a new one will be created. 108 | 109 | optional arguments: 110 | -h, --help show this help message and exit 111 | -s, --strong Indicates if the status will be based on the strong ownership 112 | classification. 113 | ``` 114 | 115 | ### Classify 116 | 117 | In classify mode, you will be able to manually add assets and re-classify already existing assets in the Intelligence Database. You should use this command to seed your Intelligence Database with known owned target assets. 118 | ``` 119 | usage: inq classify [-h] [-ar REGISTRANT [REGISTRANT ...]] 120 | [-ur REGISTRANT [REGISTRANT ...]] 121 | [-rr REGISTRANT [REGISTRANT ...]] 122 | [-ab BLOCK [BLOCK ...]] [-ub BLOCK [BLOCK ...]] 123 | [-rb BLOCK [BLOCK ...]] [-ah HOST [HOST ...]] 124 | [-uh HOST [HOST ...]] [-rh HOST [HOST ...]] 125 | [-ae EMAIL [EMAIL ...]] [-ue EMAIL [EMAIL ...]] 126 | [-re EMAIL [EMAIL ...]] 127 | [-al LINKEDIN [LINKEDIN ...]] 128 | [-ul LINKEDIN [LINKEDIN ...]] 129 | [-rl LINKEDIN [LINKEDIN ...]] 130 | DATABASE 131 | 132 | positional arguments: 133 | DATABASE The path to the intelligence database to use. If 134 | specified file does not exist, a new one will be 135 | created. 136 | 137 | optional arguments: 138 | -h, --help show this help message and exit 139 | -ar REGISTRANT [REGISTRANT ...], --accept-registrant REGISTRANT [REGISTRANT ...] 140 | Specifies a registrant to classify as accepted. 141 | -ur REGISTRANT [REGISTRANT ...], --unmark-registrant REGISTRANT [REGISTRANT ...] 142 | Specifies a registrant to classify as unmarked. 143 | -rr REGISTRANT [REGISTRANT ...], --reject-registrant REGISTRANT [REGISTRANT ...] 144 | Specifies a registrant to classify as rejected. 145 | -ab BLOCK [BLOCK ...], --accept-block BLOCK [BLOCK ...] 146 | Specifies a block to classify as accepted. 147 | -ub BLOCK [BLOCK ...], --unmark-block BLOCK [BLOCK ...] 148 | Specifies a block to classify as unmarked. 149 | -rb BLOCK [BLOCK ...], --reject-block BLOCK [BLOCK ...] 150 | Specifies a block to classify as rejected. 151 | -ah HOST [HOST ...], --accept-host HOST [HOST ...] 152 | Specifies a host to classify as accepted. 153 | -uh HOST [HOST ...], --unmark-host HOST [HOST ...] 154 | Specifies a host to classify as unmarked. 155 | -rh HOST [HOST ...], --reject-host HOST [HOST ...] 156 | Specifies a host to classify as rejected. 157 | -ae EMAIL [EMAIL ...], --accept-email EMAIL [EMAIL ...] 158 | Specifies a email to classify as accepted. 159 | -ue EMAIL [EMAIL ...], --unmark-email EMAIL [EMAIL ...] 160 | Specifies a email to classify as unmarked. 161 | -re EMAIL [EMAIL ...], --reject-email EMAIL [EMAIL ...] 162 | Specifies a email to classify as rejected. 163 | -al LINKEDIN [LINKEDIN ...], --accept-linkedin LINKEDIN [LINKEDIN ...] 164 | Specifies a LinkedIn Account to classify as accepted. 165 | -ul LINKEDIN [LINKEDIN ...], --unmark-linkedin LINKEDIN [LINKEDIN ...] 166 | Specifies a LinkedIn Account to classify as unmarked. 167 | -rl LINKEDIN [LINKEDIN ...], --reject-linkedin LINKEDIN [LINKEDIN ...] 168 | Specifies a LinkedIn Account to classify as rejected. 169 | ``` 170 | 171 | ### Dump 172 | 173 | In dump mode, you will be able to dump the contents of the Intelligence Database into a human-readable JSON file. 174 | ``` 175 | usage: inq dump [-h] [-j FILE] [-a] DATABASE 176 | 177 | positional arguments: 178 | DATABASE The path to the intelligence database to use. If 179 | specified file does not exist, a new one will be 180 | created. 181 | 182 | optional arguments: 183 | -h, --help show this help message and exit 184 | -j FILE, --json FILE The path to dump the JSON file to. Overwrites existing 185 | files. 186 | -a, --all Include rejected assets in dump. 187 | ``` 188 | 189 | ### Visualize 190 | 191 | In visualize mode, you will be able to acquire a hierarchical visualization of the Intelligence Repository. 192 | ``` 193 | usage: inq visualize [-h] [-l] DATABASE 194 | 195 | positional arguments: 196 | DATABASE The path to the intelligence database to use. If specified file 197 | does not exist, a new one will be created. 198 | 199 | optional arguments: 200 | -h, --help show this help message and exit 201 | -l, --last Simply open the last visualization generated instead of creating 202 | a new one. 203 | ``` 204 | 205 | ## Workflow 206 | 207 | Now that you know the basic features of Inquisitor, it's time you learn how to *actually* use it. Inquisitor has been written with the following steps in mind: 208 | 209 | ### Seeding 210 | 211 | In this step, your Intelligence Database doesn't have anything in it yet. We're going to have to start somewhere so go ahead and seed the database with assets that you know belong to your target organization. You can do this using the `classify` command. 212 | 213 | ### Scanning 214 | 215 | Now that the database has assets that are known to belong to your target organization. You can then proceed with scanning. You can do this using the `scan` command. 216 | 217 | When you invoke the `scan` command on your Intelligence Database, Inquisitor proceeds to run the `transform` methods of assets that are classified as `accepted`. Once scanning is finished, you're going to end up with more assets that might potentially belong to your target organization. 218 | 219 | If you don't end up with any new assets, you can either seed your Intelligence Database with new information, or simply proceed to wrap up the process by proceeding to the Reporting step. 220 | 221 | ### Classifying 222 | 223 | While Inquisitor performs automatic asset classification for you, it might end up missing some assets that do, in fact, belong to your target organization. 224 | 225 | When this happens, you're going to have to check the database contents and manually classify the assets. Usually, you'd want to pay attention to **Registrant** assets as there is no way to automatically determine ownership for that asset type. Also most other asset types rely on the ownership classification of Registrant assets in order to determine whether they belong to your target or not, so it's definitelty best to pay attention to your Registrant assets. Additionally, you don't end up with a lot of Registrant assets in the first place so it's not going to be that hard sifting through them. 226 | 227 | ### Reporting 228 | 229 | You can generate a visualization of the assets that belong to your target organization using the `visualize` command or the `dump` command. 230 | 231 | ## Demo 232 | 233 | I have video ddemonstrations of the tool running in the following link: https://drive.google.com/open?id=0B_O70BVu38TRclo5dWRBWkdTTWc 234 | 235 | I wasn't able to fully record the run of the scan command though since my free screen recorder only records up to 10 minutes. 236 | 237 | ## Development 238 | 239 | The the Inquisitor project is laid out in the following format: 240 | ``` 241 | . 242 | |-- README.md 243 | |-- inquisitor 244 | | |-- __init__.py 245 | | |-- assets 246 | | | |-- __init__.py 247 | | | |-- block.py 248 | | | |-- email.py 249 | | | |-- host.py 250 | | | |-- linkedin.py 251 | | | `-- registrant.py 252 | | |-- extractors 253 | | | |-- __init__.py 254 | | | `-- emails.py 255 | | `-- sources 256 | | |-- __init__.py 257 | | |-- google_search.py 258 | | `-- shodan_search.py 259 | |-- inq 260 | |-- report 261 | | `-- index.html 262 | |-- setup.py 263 | `-- tests 264 | |-- __init__.py 265 | `-- test_inq.py 266 | ``` 267 | 268 | It has three main modules named `assets`, `extractors`, and `sources`. The main script is called `inq`. 269 | 270 | As a developer you would mostly be interested in adding new types of assets into the system so the developer guide would mostly focus on that. 271 | 272 | ### Repository 273 | 274 | Before we move on to actually implementing asset classes, we would first need to understand how to interact with the Intelligence Database as we will be interacting with it when we derive related assets from our asset classes. 275 | 276 | The source code for the Intelligence Database is stored in the `inquisitor/__init__.py` file. The actual name for the logical wrapper of the Intelligence Database is called `IntelligenceRepository`. 277 | 278 | You only need to call the `IntelligenceRepository.get_asset_string` function from asset classes as appending new assets onto the Intelligence Database is the responsibility of the `scan` module in the `inq` script. You would mostly use this function to create instances of assets or retrieve them from the database if they exist. This function is important when returning assets from the `related` and `transform` functions of your asset classes as creating new asset objects is expensive since some of them use network resources during initialization. 279 | 280 | ``` 281 | Function 282 | 283 | IntelligenceRepository.get_asset_string(asset_type, identifier, create=False, store=False) 284 | 285 | Description 286 | 287 | Retrieves the primary key and asset object for the asset with the provided 288 | type and identifier. 289 | 290 | Parameters 291 | 292 | asset_type: class, required 293 | 294 | The type of the asset to retrieve from the Intelligence Database. You 295 | will actually have to pass the class object of the asset type you want 296 | to retrieve. 297 | 298 | identifier: any, required 299 | 300 | The identifier of the asset to retrieve. Consider the identifier as the 301 | unique attribute of an asset object. As for which attribute is to be 302 | used to identify an asset, it depends on the contents of the OBJECT_ID 303 | variable in the asset module. 304 | 305 | create: bool, optional, default=False 306 | 307 | When no matching asset object is found, a new one will be created and 308 | returned if this parameter is set to True. The new asset will not 309 | necessarily be stored in the Intelligence Database unless specified 310 | using the "store" parameter. However, I suggest you do not do this as 311 | adding assets to the Intelligence Database is the responsibility of 312 | another module. 313 | 314 | store: bool, optional, default=False 315 | 316 | When a new asset is created when none is found, the new one will be 317 | stored in the Intelligence Database. As said previously, I suggest that 318 | you do not do this as adding assets to the Intelligence Database is the 319 | responsibility of another module. 320 | 321 | Returns 322 | 323 | A two-element tuple where the first element is the database primary key of 324 | the element returned, and the second element is the deserialized asset 325 | object retrieved from the database. 326 | 327 | None if the asset was not found. 328 | 329 | If the asset was not found and the create flag was set to True, the primary 330 | key member of the tuple will be set to None. 331 | 332 | ``` 333 | 334 | ### Assets 335 | 336 | To create a new asset type, create a new file inside the `inquisitor/assets` directory and paste the following skeleton code inside: 337 | 338 | ```python 339 | import inquisitor.assets 340 | 341 | class ASSET_NAMEValidateException(Exception): 342 | pass 343 | 344 | def canonicalize(ASSET_IDENTIFIER): 345 | return ASSET_IDENTIFIER 346 | 347 | def main_classify_args(parser): 348 | parser.add_argument( 349 | '-aASSET_NAME_LETTER', '--accept-ASSET_NAME', 350 | metavar='ASSET_NAME', 351 | type=canonicalize, 352 | nargs='+', 353 | help='Specifies a ASSET_NAME to classify as accepted.', 354 | dest='ASSET_NAMEs_accepted', 355 | default=list(), 356 | ) 357 | parser.add_argument( 358 | '-uASSET_NAME_LETTER', '--unmark-ASSET_NAME', 359 | metavar='ASSET_NAME', 360 | type=canonicalize, 361 | nargs='+', 362 | help='Specifies a ASSET_NAME to classify as unmarked.', 363 | dest='ASSET_NAMEs_unmarked', 364 | default=list(), 365 | ) 366 | parser.add_argument( 367 | '-rASSET_NAME_LETTER', '--reject-ASSET_NAME', 368 | metavar='ASSET_NAME', 369 | type=canonicalize, 370 | nargs='+', 371 | help='Specifies a ASSET_NAME to classify as rejected.', 372 | dest='ASSET_NAME_rejected', 373 | default=list(), 374 | ) 375 | 376 | def main_classify_canonicalize(args): 377 | accepted = set(args.ASSET_NAMEs_accepted) 378 | unmarked = set(args.ASSET_NAMEs_unmarked) 379 | rejected = set(args.ASSET_NAME_rejected) 380 | redundant = set.intersection(accepted, unmarked, rejected) 381 | if redundant: 382 | raise ValueError( 383 | ('Conflicting classifications for ASSET_NAMEs ' 384 | ': {}').format(list(redundant)) 385 | ) 386 | accepted = set([canonicalize(a) for a in accepted]) 387 | unmarked = set([canonicalize(a) for a in unmarked]) 388 | rejected = set([canonicalize(a) for a in rejected]) 389 | return (accepted, unmarked, rejected) 390 | 391 | class ASSET_NAME(inquisitor.assets.Asset): 392 | 393 | def __init__(self, ASSET_IDENTIFIER, owned=None): 394 | super(self.__class__, self).__init__(owned=owned) 395 | self.ASSET_IDENTIFIER = canonicalize(ASSET_IDENTIFIER) 396 | # TODO: Perform other initialization actions here 397 | 398 | def __eq__(self, other): 399 | if not isinstance(other, self.__class__): 400 | return False 401 | return self.ASSET_IDENTIFIER == other.ASSET_IDENTIFIER 402 | 403 | def related(self, repo): 404 | # Prepare the results 405 | results = set() 406 | # TODO: Create related assets here based on the attributes of this asset 407 | # Return the results 408 | return results 409 | 410 | def transform(self, repo, sources): 411 | # Prepare the results 412 | assets = set() 413 | # Google Transforms 414 | if sources.get('google'): 415 | subassets = self.cache_transform_get('google', repo) 416 | if not subassets: 417 | # Acquire API 418 | google = sources['google'] 419 | # TODO: Perform Google queries here and the results to 'subassets' 420 | # Cache The Transform 421 | self.cache_transform_store('google', subassets) 422 | assets.update(subassets) 423 | # Shodan Transforms 424 | if sources.get('shodan'): 425 | subassets = self.cache_transform_get('shodan', repo) 426 | if not subassets: 427 | # Acquire API 428 | shodan = sources['shodan'] 429 | # TODO: Perform Google queries here and the results to 'subassets' 430 | # Cache The Transform 431 | self.cache_transform_store('shodan', subassets) 432 | assets.update(subassets) 433 | # Return the results 434 | return assets 435 | 436 | def is_owned(self, repo): 437 | if self.owned: 438 | return True 439 | # TODO: Automatically determine ownership based on repo contents 440 | return False 441 | 442 | def parent_asset(self, repo): 443 | # TODO: Return parent asset based on repo contents 444 | return None 445 | 446 | REPOSITORY = 'ASSET_REPOSITORY' 447 | ASSET_CLASS = ASSET_NAME 448 | OBJECT_ID = 'ASSET_IDENTIFIER' 449 | ``` 450 | 451 | Now replace the following strings with the appropriate values 452 | * `ASSET_NAME` : Proper name of your asset (e.g. Registrant, Host, etc.) 453 | * `ASSET_IDENTIFIER` : The name of the identifier attribute of your asset 454 | * `ASSET_NAME_LETTER` : The first letter of your asset in lowercase 455 | * `ASSET_REPOSITORY` : Lower case of the plural form of your asset name 456 | 457 | Finally, in `inquisitor/__init__.py`, register your asset in the `ASSET_MODULES` list. Make sure you import your new asset from the file in question. 458 | 459 | Congratulations! By this point, you now have a new working asset type! 460 | 461 | However, you are going to need to implement the following methods to make sure your assets get correlated with other asset types: 462 | 463 | ``` 464 | Function 465 | 466 | related 467 | 468 | Description 469 | 470 | Returns the set of assets directly related to the asset in question (i.e. 471 | those that can be derived without querying a search engine). 472 | 473 | When creating asset objects, make sure you use the 474 | IntelligenceRepository.get_asset_string method instead of instatiating a 475 | new one your self so the asset can be returned from the repository if it 476 | exists. 477 | 478 | Set the create flag to True when calling the method in question in order 479 | to return a new object when one isn't found. 480 | 481 | Set the store flag to False as appending assets is the job of another 482 | module. 483 | 484 | Parameters 485 | 486 | repo: IntelligenceRepository 487 | 488 | The Intelligence Repository that is being used in the current context. 489 | 490 | Returns 491 | 492 | Set of assets directly related to the asset in question. 493 | 494 | ``` 495 | 496 | ``` 497 | Function 498 | 499 | transform 500 | 501 | Description 502 | 503 | Returns the set of assets potentially related to the asset in question 504 | (i.e. those that can be derived by querying a search engine). 505 | 506 | You may access search engine objects through the provided sources 507 | parameter. 508 | 509 | Each search engine object has a transform method which automatically 510 | creates asset objects for you. You just need to provide it the repository 511 | and your query string, and then append the objects it returns to the set 512 | of assets to be returned by your asset's transform method. 513 | 514 | Parameters 515 | 516 | repo: IntelligenceRepository 517 | 518 | The Intelligence Repository that is being used in the current context. 519 | 520 | sources: dict 521 | 522 | The list of search engine objects that are available for use. 523 | 524 | Returns 525 | 526 | Set of assets potentially related to the asset in question. 527 | 528 | ``` 529 | 530 | ``` 531 | Function 532 | 533 | is_owned 534 | 535 | Description 536 | 537 | Determines if there is high confidence that this asset does indeed belong 538 | to the target. Usually checks for any "strong" classification tag first by 539 | looking at the contents of the "owned" variable, before performing 540 | automatic evaluation. 541 | 542 | Automatic evaluation depends on what type of asset you're writing. For 543 | example, for a Host asset, the secondary sources of determining ownership 544 | would include looking if its registrant is owned by the target, if it's 545 | parent domain is owned by the target. etc. 546 | 547 | Parameters 548 | 549 | repo: IntelligenceRepository 550 | 551 | The Intelligence Repository that is being used in the current context. 552 | 553 | Returns 554 | 555 | True it is determined with high confidence that this asset does indeed 556 | belong to the target. 557 | 558 | ``` 559 | 560 | ``` 561 | Function 562 | 563 | parent_asset 564 | 565 | Description 566 | 567 | Returns the asset object that is considered the parent of this asset 568 | object. 569 | 570 | Parameters 571 | 572 | repo: IntelligenceRepository 573 | 574 | Returns 575 | 576 | The asset object that this asset falls under (e.g. a Block is under a 577 | Registrant, a Host is under a Block, a Host is under another Host, an Email 578 | is under a Host, etc. This is primarily used for visualization. 579 | 580 | ``` 581 | 582 | After implementing the above methods, make sure you set the `REPOSITORY`, `ASSET_CLASS`, and `OBJECT_ID` variables on the bottom of your asset's source code. 583 | 584 | ## Contact and Notes 585 | 586 | The scan mode isn't fully tested because of quotas concerning the search engines involved. Also, this project was made in a rush as part of a week-long hackaton challenge so there might be a lot of problems lying around. Please create an issue ticket or contact me at penafieljlm@gmail.com if you find a bug or have some questions. 587 | 588 | ## Disclaimer 589 | 590 | This work is derived from the approaches implemented by the Maltego and recon-ng Open Source Intelligence tools. I supplemented these approaches with ideas that are either already common knowledge (e.g. whois tells you who the owner of a domain is, subdomains are owned by the same organization owning their parent - as implied by domain name bruteforcing attacks, organizations are authoritative of the domain names that they own, etc.), or are original and were conceived by me in my own personal time as part of my hobby (e.g. acceptability ratings, various transforms, classification inheritance, etc.). 591 | 592 | No component of this work was derived from any work that I have done for any employer in the past. The whole project, including the proof-of-concept, was written from scratch and was augmented with ideas from the information security community. 593 | --------------------------------------------------------------------------------