├── certstream_analytics ├── __init__.py ├── reporters │ ├── __init__.py │ └── base.py ├── transformers │ ├── __init__.py │ └── base.py ├── storages │ ├── __init__.py │ ├── base.py │ └── elasticsearch_storage.py ├── analysers │ ├── __init__.py │ ├── base.py │ ├── domain_matching.py │ └── common_domain_analyser.py └── stream.py ├── .coveragerc ├── tests ├── opendns-top-domains.txt ├── test_stream.py ├── test_elasticsearch.py ├── test_reporter.py ├── samples.json └── test_domain_matching_analyser.py ├── setup.cfg ├── .gitmodules ├── LICENSE ├── scripts ├── sundry │ ├── generate_features.py │ ├── isolation_forest.py │ ├── elliptic_envelope.py │ ├── lof.py │ └── certstream-domain-features.ipynb └── replay.py ├── .travis.yml ├── setup.py ├── .gitignore ├── bin └── domain_matching.py ├── README.md └── pylintrc /certstream_analytics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=certstream-analytics 3 | -------------------------------------------------------------------------------- /certstream_analytics/reporters/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | from .base import Reporter, FileReporter 3 | -------------------------------------------------------------------------------- /tests/opendns-top-domains.txt: -------------------------------------------------------------------------------- 1 | google.com 2 | facebook.com 3 | bankofamerica.com 4 | apple.com 5 | www.net.cn 6 | discover.com 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [tool:pytest] 5 | pep8maxlinelength = 120 6 | 7 | [pep8] 8 | max-line-length = 120 9 | -------------------------------------------------------------------------------- /certstream_analytics/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | from .base import Transformer, PassthroughTransformer, CertstreamTransformer 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "data/opendns/public-domain-lists"] 2 | path = data/opendns/public-domain-lists 3 | url = https://github.com/opendns/public-domain-lists.git 4 | -------------------------------------------------------------------------------- /certstream_analytics/storages/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | from .base import Storage 3 | from .elasticsearch_storage import ElasticsearchStorage 4 | -------------------------------------------------------------------------------- /certstream_analytics/storages/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Save certstream data into various storages, streaming or not. 3 | """ 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | # pylint: disable=no-init,too-few-public-methods 8 | class Storage: 9 | """ 10 | Define the template of all analyser class. 11 | """ 12 | __metaclass__ = ABCMeta 13 | 14 | @abstractmethod 15 | def save(self, record): 16 | """ 17 | Move along, nothing to see here. 18 | """ 19 | -------------------------------------------------------------------------------- /certstream_analytics/analysers/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | from .base import Analyser, Debugger 3 | from .domain_matching import AhoCorasickDomainMatching 4 | from .domain_matching import DomainMatchingOption, DomainMatching 5 | from .common_domain_analyser import WordSegmentation 6 | from .common_domain_analyser import BulkDomainMarker 7 | from .common_domain_analyser import FeaturesGenerator 8 | from .common_domain_analyser import IDNADecoder 9 | from .common_domain_analyser import HomoglyphsDecoder 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Huy Do 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /certstream_analytics/reporters/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Report the analysis result somewhere. 3 | """ 4 | import json 5 | from abc import ABCMeta, abstractmethod 6 | 7 | 8 | # pylint: disable=no-init,too-few-public-methods 9 | class Reporter: 10 | """ 11 | Define the template of all reporter class. 12 | """ 13 | __metaclass__ = ABCMeta 14 | 15 | @abstractmethod 16 | def publish(self, report): 17 | """ 18 | Move along, nothing to see here. 19 | """ 20 | 21 | 22 | class FileReporter(Reporter): 23 | """ 24 | Simply print the report to a file. 25 | """ 26 | def __init__(self, path): 27 | """ 28 | Note that an exception will be raised if the path is not valid or writable. 29 | """ 30 | self.fhandler = open(path, 'a') 31 | 32 | def __del__(self): 33 | self.fhandler.close() 34 | 35 | def publish(self, report): 36 | """ 37 | This is a very basic reporter that will only print out the record it receives 38 | to a plain text file. 39 | """ 40 | if not report: 41 | return 42 | 43 | print(json.dumps(report), file=self.fhandler) 44 | -------------------------------------------------------------------------------- /scripts/sundry/generate_features.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Generate features for outlier detection. 3 | ''' 4 | 5 | import json 6 | import sys 7 | 8 | from certstream_analytics.analysers import WordSegmentation 9 | from certstream_analytics.analysers import IDNADecoder 10 | from certstream_analytics.analysers import FeaturesGenerator 11 | 12 | def main(max_count=None): 13 | ''' 14 | The record is assumed to be stored in a JSON file passed in as the first 15 | parameter of the script. 16 | ''' 17 | segmenter = WordSegmentation() 18 | decoder = IDNADecoder() 19 | generator = FeaturesGenerator() 20 | 21 | with open(sys.argv[1]) as fhandle: 22 | count = 0 23 | 24 | for line in fhandle: 25 | try: 26 | record = json.loads(line.strip()) 27 | except json.decoder.JSONDecodeError: 28 | continue 29 | 30 | record = decoder.run(record) 31 | record = segmenter.run(record) 32 | record = generator.run(record) 33 | 34 | print(json.dumps(record)) 35 | count += 1 36 | 37 | if max_count and count > max_count: 38 | break 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /tests/test_stream.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Test consuming the data from the great certstream. 3 | ''' 4 | import time 5 | import unittest 6 | 7 | from certstream_analytics.analysers import Debugger 8 | from certstream_analytics.transformers import CertstreamTransformer 9 | from certstream_analytics.stream import CertstreamAnalytics 10 | 11 | 12 | class CertstreamTest(unittest.TestCase): 13 | ''' 14 | Test the way we consume data from certstream. 15 | ''' 16 | DEFAULT_DELAY = 30 17 | 18 | def setUp(self): 19 | ''' 20 | Setup the client to consume from certstream. 21 | ''' 22 | self.debugger = Debugger() 23 | self.transformer = CertstreamTransformer() 24 | 25 | self.engine = CertstreamAnalytics(transformer=self.transformer, 26 | analysers=self.debugger) 27 | 28 | def test_consume(self): 29 | ''' 30 | Start to consume some data from certstream. 31 | ''' 32 | self.engine.start() 33 | 34 | # Wait a bit 35 | time.sleep(CertstreamTest.DEFAULT_DELAY) 36 | 37 | self.engine.stop() 38 | # We should see some data coming already 39 | self.assertTrue(self.debugger.count, 'Consuming data from certstream successfully') 40 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | os: 3 | - linux 4 | python: 5 | - '3.7' 6 | before_install: 7 | - sudo apt-get install -y libenchant-dev 8 | - sudo apt-get install -y apt-transport-https 9 | - wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - 10 | - echo "deb https://artifacts.elastic.co/packages/6.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-6.x.list 11 | - sudo apt-get update && sudo apt-get remove -y elasticsearch 12 | - sudo apt-get install -y elasticsearch 13 | - sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch 14 | - sudo service elasticsearch start 15 | install: 16 | - pip install --upgrade pytest 17 | - pip install pytest-pep8 pytest-cov 18 | - pip install codecov 19 | - pip install elasticsearch_dsl certstream pyahocorasick tldextract wordsegment pyenchant idna confusable-homoglyphs 20 | - pip install git+https://github.com/casics/nostril.git 21 | - pip install -e .[tests] 22 | before_script: 23 | - sleep 10 24 | - sudo systemctl -l status elasticsearch 25 | - curl 'http://localhost:9200' 26 | script: 27 | - pytest --pep8 -m pep8 certstream_analytics/ 28 | - PYTHONPATH=$PWD:$PYTHONPATH pytest --cov=./ tests/ 29 | after_script: 30 | - curl 'http://localhost:9200/_cat/indices?v' 31 | after_success: 32 | - codecov 33 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Standard Python setup script. 3 | ''' 4 | 5 | from setuptools import setup, find_packages 6 | 7 | with open('README.md', 'r') as fh: 8 | long_description = fh.read() 9 | 10 | setup( 11 | name='certstream-analytics', 12 | version='0.1.7', 13 | description='certstream + analytics', 14 | url='https://github.com/huydhn/certstream-analytics', 15 | author='Huy Do', 16 | author_email='huydhn@gmail.com', 17 | license='MIT', 18 | long_description=long_description, 19 | long_description_content_type='text/markdown', 20 | install_requires=[ 21 | 'elasticsearch_dsl', 22 | 'certstream', 23 | 'pyahocorasick', 24 | 'tldextract', 25 | 'wordsegment', 26 | 'pyenchant', 27 | 'idna', 28 | 'confusable_homoglyphs' 29 | ], 30 | tests_require=[ 31 | 'coverage', 32 | 'nose', 33 | 'pytest-pep8', 34 | 'pytest-cov', 35 | 'codecov' 36 | ], 37 | dependency_links=[ 38 | 'https://github.com/casics/nostril/tarball/master' 39 | ], 40 | packages=find_packages(), 41 | scripts=['bin/domain_matching.py'], 42 | classifiers=[ 43 | "Programming Language :: Python :: 3", 44 | "License :: OSI Approved :: MIT License", 45 | "Operating System :: OS Independent", 46 | ], 47 | ) 48 | -------------------------------------------------------------------------------- /scripts/sundry/isolation_forest.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Apply the isolation forest method to separate our outliers. 3 | ''' 4 | import json 5 | import sys 6 | import numpy as np 7 | 8 | from sklearn.ensemble import IsolationForest 9 | from sklearn.preprocessing import scale 10 | 11 | 12 | def main(): 13 | ''' 14 | The procedure contains two simple steps: 15 | - Scale the data to the standard distribution with mean 0 and unit variance. 16 | This might be too simplistic. 17 | - Apply the isolation forest. The contamination level is set manually. 18 | ''' 19 | domains = [] 20 | raw = [] 21 | 22 | with open(sys.argv[1]) as fhandle: 23 | for line in fhandle: 24 | record = json.loads(line.strip()) 25 | 26 | for analyser in record['analysers']: 27 | if analyser['analyser'] == 'FeaturesGenerator': 28 | raw.extend(analyser['output']) 29 | 30 | if analyser['analyser'] == 'WordSegmentation': 31 | domains.extend(analyser['output'].keys()) 32 | 33 | if len(raw) != len(domains): 34 | print(record) 35 | sys.exit(0) 36 | 37 | x_samples = scale(np.array(raw)) 38 | 39 | engine = IsolationForest(behaviour='new', contamination=0.015) 40 | y_samples = engine.fit_predict(x_samples) 41 | 42 | for index, y_sample in enumerate(y_samples): 43 | if y_sample == -1: 44 | print(domains[index]) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /scripts/sundry/elliptic_envelope.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Apply the elliptic envelope method to separate our outliers. 3 | ''' 4 | import json 5 | import sys 6 | import numpy as np 7 | 8 | from sklearn.covariance import EllipticEnvelope 9 | from sklearn.preprocessing import scale 10 | 11 | 12 | def main(): 13 | ''' 14 | The procedure contains two simple steps: 15 | - Scale the data to the standard distribution with mean 0 and unit variance. 16 | This might be too simplistic. 17 | - Apply the elliptic envelope. The contamination level is set manually. 18 | ''' 19 | domains = [] 20 | raw = [] 21 | 22 | with open(sys.argv[1]) as fhandle: 23 | for line in fhandle: 24 | record = json.loads(line.strip()) 25 | 26 | for analyser in record['analysers']: 27 | if analyser['analyser'] == 'FeaturesGenerator': 28 | raw.extend(analyser['output']) 29 | 30 | if analyser['analyser'] == 'WordSegmentation': 31 | domains.extend(analyser['output'].keys()) 32 | 33 | if len(raw) != len(domains): 34 | print(record) 35 | sys.exit(0) 36 | 37 | x_samples = scale(np.array(raw)) 38 | 39 | engine = EllipticEnvelope(contamination=0.015, support_fraction=1.0) 40 | y_samples = engine.fit_predict(x_samples) 41 | 42 | for index, y_sample in enumerate(y_samples): 43 | if y_sample == -1: 44 | print(domains[index]) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /scripts/sundry/lof.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Apply the local outlier factor method to separate our outliers. 3 | ''' 4 | import json 5 | import sys 6 | import numpy as np 7 | 8 | from sklearn.neighbors import LocalOutlierFactor 9 | from sklearn.preprocessing import scale 10 | 11 | 12 | def main(): 13 | ''' 14 | The procedure contains two simple steps: 15 | - Scale the data to the standard distribution with mean 0 and unit variance. 16 | This might be too simplistic. 17 | - Apply the local outlier factor. The contamination level is set manually. 18 | 19 | This method does not seem to work in our case cause I suspect it treats groups 20 | of several outliers as clusters. 21 | ''' 22 | domains = [] 23 | raw = [] 24 | 25 | with open(sys.argv[1]) as fhandle: 26 | for line in fhandle: 27 | record = json.loads(line.strip()) 28 | 29 | for analyser in record['analysers']: 30 | if analyser['analyser'] == 'FeaturesGenerator': 31 | raw.extend(analyser['output']) 32 | 33 | if analyser['analyser'] == 'WordSegmentation': 34 | domains.extend(analyser['output'].keys()) 35 | 36 | if len(raw) != len(domains): 37 | print(record) 38 | sys.exit(0) 39 | 40 | x_samples = scale(np.array(raw)) 41 | 42 | # Need to check the appropriate value for n_neighbors 43 | engine = LocalOutlierFactor(contamination=0.015) 44 | y_samples = engine.fit_predict(x_samples) 45 | 46 | for index, y_sample in enumerate(y_samples): 47 | if y_sample == -1: 48 | print(domains[index]) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nohup.* 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | *.txt 108 | .idea 109 | -------------------------------------------------------------------------------- /tests/test_elasticsearch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Save some dummy records into Elasticsearch. 3 | ''' 4 | import os 5 | import json 6 | import time 7 | import unittest 8 | 9 | from elasticsearch import Elasticsearch 10 | from elasticsearch_dsl import Search, Q 11 | 12 | from certstream_analytics.transformers import CertstreamTransformer 13 | from certstream_analytics.storages import ElasticsearchStorage 14 | 15 | 16 | class ElasticsearchTest(unittest.TestCase): 17 | ''' 18 | Test the way we save data into Elasticsearch. 19 | ''' 20 | def setUp(self): 21 | ''' 22 | Setup the client to consume from certstream and save the data into 23 | Elasticsearch 24 | ''' 25 | elasticsearch_host = os.getenv('ELASTICSEARCH_HOST', 'localhost:9200') 26 | 27 | self.transformer = CertstreamTransformer() 28 | self.storage = ElasticsearchStorage(hosts=[elasticsearch_host]) 29 | self.search = Search(using=Elasticsearch(elasticsearch_host), index='certstream-*') 30 | 31 | def test_save(self): 32 | ''' 33 | Start to save certstream data into Elasticsearch. 34 | ''' 35 | current_dir = os.path.dirname(os.path.realpath(__file__)) 36 | 37 | with open(os.path.join(current_dir, 'samples.json')) as fhandle: 38 | samples = json.load(fhandle) 39 | 40 | for sample in samples: 41 | filtered = self.transformer.apply(sample) 42 | self.storage.save(filtered) 43 | 44 | # Try to wait for a few seconds here so that Elasticsearch has enough 45 | # time to index the data 46 | time.sleep(5) 47 | 48 | for sample in samples: 49 | domain = sample['data']['leaf_cert']['all_domains'][0] 50 | # Look for the record in Elasticsearch 51 | query = Q('multi_match', query=domain, fields=['domain', 'san']) 52 | response = self.search.query(query).execute() 53 | 54 | self.assertGreaterEqual(response.hits.total, 1, 55 | 'The record has been indexed in Elasticsearch') 56 | self.assertIn(response.hits[0].domain, sample['data']['leaf_cert']['all_domains'], 57 | 'The correct record is returned') 58 | -------------------------------------------------------------------------------- /certstream_analytics/analysers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Analyse the certificate data from certstream. 3 | """ 4 | import json 5 | import logging 6 | from abc import ABCMeta, abstractmethod 7 | 8 | 9 | # pylint: disable=no-init,too-few-public-methods 10 | class Analyser: 11 | """ 12 | Define the template of all analyser class. 13 | """ 14 | __metaclass__ = ABCMeta 15 | 16 | @abstractmethod 17 | def run(self, record): 18 | """ 19 | In normal cases, an analyser will process the record, save the result 20 | into the record, and then return the updated record so that the next 21 | analyser can choose what to do next. Therefore, the structure of the 22 | record comes from CertstreamTransformer class as follows: 23 | 24 | { 25 | # These fields are extracted from certstream 26 | cert_index: INTEGER, 27 | seen: TIMESTAMP, 28 | chain: [ 29 | ORGANIZATION 30 | ], 31 | not_before: TIMESTAMP, 32 | not_after: TIMESTAMP, 33 | all_domains: [ 34 | SAN 35 | ], 36 | 37 | # This is a place holder field which are used later by the 38 | # analysers. Each analyser will append its result here. 39 | analysers: [ 40 | { 41 | analyser: ANALYSER NAME, 42 | output: ANYTHING GOES HERE, 43 | }, 44 | ], 45 | } 46 | """ 47 | 48 | 49 | class Debugger(Analyser): 50 | """ 51 | A dummy analyser for debugging. 52 | """ 53 | def __init__(self): 54 | """ 55 | Keep track of the number of records so far for debugging purpose. 56 | """ 57 | self.count = 0 58 | 59 | def run(self, record): 60 | ''' 61 | This is a dummy analyser that will only print out the record it processes. 62 | ''' 63 | logging.info(json.dumps(record)) 64 | 65 | # Update the number of records so far 66 | self.count += 1 67 | 68 | if 'analysers' not in record: 69 | record['analysers'] = [] 70 | 71 | record['analysers'].append({ 72 | 'analyser': type(self).__name__, 73 | 'output': self.count, 74 | }) 75 | 76 | return record 77 | -------------------------------------------------------------------------------- /certstream_analytics/transformers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Transform the certificate data from certstream before passing it to the 3 | processing pipeline. 4 | """ 5 | from abc import ABCMeta, abstractmethod 6 | 7 | 8 | # pylint: disable=no-init,too-few-public-methods 9 | class Transformer: 10 | """ 11 | Define the template of all transformer class. 12 | """ 13 | __metaclass__ = ABCMeta 14 | 15 | @abstractmethod 16 | def apply(self, raw): 17 | """ 18 | Move along, nothing to see here. 19 | """ 20 | 21 | 22 | class PassthroughTransformer(Transformer): 23 | """ 24 | A dummy transformer that doesn't do anything. 25 | """ 26 | def apply(self, raw): 27 | """ 28 | Move along, nothing to see here. 29 | """ 30 | return raw 31 | 32 | 33 | class CertstreamTransformer(Transformer): 34 | """ 35 | Transform data from certstream into something readily consumable by the 36 | processing pipeline. 37 | """ 38 | def apply(self, raw): 39 | """ 40 | The format of the message from certstream can be found at their github 41 | documentation. 42 | 43 | So far, we are only interested in the domain names, the timestamps, and 44 | probably the content of the subject. So the returned stucture is as 45 | follows: 46 | 47 | { 48 | # These fields are extracted from certstream 49 | cert_index: INTEGER, 50 | seen: TIMESTAMP, 51 | chain: [ 52 | ORGANIZATION 53 | ], 54 | not_before: TIMESTAMP, 55 | not_after: TIMESTAMP, 56 | all_domains: [ 57 | SAN 58 | ], 59 | 60 | # This is a place holder field which are used later by the 61 | # analysers. Each analyser will append its result here. 62 | analysers: [ 63 | { 64 | analyser: ANALYSER NAME, 65 | output: ANYTHING GOESE HERE, 66 | }, 67 | ], 68 | } 69 | """ 70 | filtered = { 71 | 'cert_index': raw['data']['cert_index'], 72 | 'seen': raw['data']['seen'], 73 | 'chain': [], 74 | 75 | # The analyser result will be stored here later on 76 | 'analysers': [], 77 | } 78 | 79 | interested_fields = ['not_before', 'not_after', 'all_domains'] 80 | 81 | if raw['data']['leaf_cert']['all_domains']: 82 | filtered.update({k: raw['data']['leaf_cert'][k] for k in interested_fields}) 83 | return filtered 84 | 85 | return None 86 | -------------------------------------------------------------------------------- /certstream_analytics/storages/elasticsearch_storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Save certstream data into Elasticsearch so that it can be queried by Kibana 3 | later on. 4 | """ 5 | from datetime import datetime 6 | from elasticsearch_dsl import connections, analyzer 7 | from elasticsearch_dsl import Document, Date, Text, Keyword 8 | 9 | from .base import Storage 10 | 11 | ANALYZER = analyzer('standard_analyzer', 12 | tokenizer='standard_tokenizer', 13 | filter=['lowercase']) 14 | 15 | 16 | # pylint: disable=too-few-public-methods 17 | class ElasticsearchStorage(Storage): 18 | """ 19 | An experiment Elasticsearch storage to keep and index the received records. 20 | """ 21 | class Record(Document): 22 | """ 23 | An Elasticsearch record as it is. 24 | """ 25 | timestamp = Date(default_timezone='UTC') 26 | 27 | # As reported by certstream 28 | seen = Date(default_timezone='UTC') 29 | 30 | # The domain time to live 31 | not_before = Date(default_timezone='UTC') 32 | not_after = Date(default_timezone='UTC') 33 | 34 | # The domain and its alternative names 35 | domain = Text(analyzer=ANALYZER, fields={'raw': Keyword()}) 36 | san = Text(analyzer=ANALYZER, fields={'raw': Keyword()}) 37 | 38 | # The issuer 39 | chain = Text(analyzer=ANALYZER, fields={'raw': Keyword()}) 40 | 41 | class Index: 42 | """ 43 | Use daily indices. 44 | """ 45 | name = 'certstream-*' 46 | 47 | # pylint: disable=arguments-differ 48 | def save(self, **kwargs): 49 | """ 50 | Magically save the record in Elasticsearch. 51 | """ 52 | self.timestamp = datetime.now() 53 | # Override the index to go to the proper timeslot 54 | kwargs['index'] = self.timestamp.strftime('certstream-%Y.%m.%d') 55 | 56 | return super().save(**kwargs) 57 | 58 | def __init__(self, hosts, timeout=10): 59 | """ 60 | Provide the Elasticsearch hostname (Defaults to localhost). 61 | """ 62 | connections.create_connection(hosts=hosts, timeout=timeout) 63 | 64 | def save(self, record): 65 | """ 66 | Save the certstream record in Elasticsearch. 67 | """ 68 | elasticsearch_record = ElasticsearchStorage.Record(meta={'id': record['cert_index']}) 69 | 70 | # In miliseconds 71 | elasticsearch_record.seen = int(record['seen'] * 1000) 72 | elasticsearch_record.not_before = int(record['not_before'] * 1000) 73 | elasticsearch_record.not_after = int(record['not_after'] * 1000) 74 | 75 | # Elasticsearch will parse and index the domain and all its alternative names 76 | elasticsearch_record.domain = record['all_domains'][0] 77 | elasticsearch_record.san = record['all_domains'][1:] 78 | 79 | elasticsearch_record.save() 80 | -------------------------------------------------------------------------------- /tests/test_reporter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Various tests for the reporter module. 3 | ''' 4 | import json 5 | import tempfile 6 | import unittest 7 | 8 | from certstream_analytics.reporters import FileReporter 9 | 10 | 11 | class FileReporterTest(unittest.TestCase): 12 | ''' 13 | Test the file-based reporter. 14 | ''' 15 | def setUp(self): 16 | ''' 17 | Create a temporary file so that the test can write its reports into it. 18 | ''' 19 | self.tmp = tempfile.NamedTemporaryFile() 20 | self.reporter = FileReporter(path=self.tmp.name) 21 | 22 | def test_report(self): 23 | ''' 24 | Dump all the test reports to our temporary file. 25 | ''' 26 | cases = [ 27 | { 28 | 'report': { 29 | 'all_domains': ['store.google.com', 'google.com'], 30 | 'analysers': [ 31 | { 32 | 'analyser': 'AhoCorasickDomainMatching', 33 | 'domain': 'store.google.com', 34 | 'match': 'google', 35 | }, 36 | ], 37 | }, 38 | 'description': 'Report an exact match domain', 39 | }, 40 | 41 | { 42 | 'report': { 43 | 'all_domains': ['www.facebook.com.msg40.site'], 44 | 'analysers': [ 45 | { 46 | 'analyser': 'AhoCorasickDomainMatching', 47 | 'domain': 'www.facebook.com.msg40.site', 48 | 'match': 'facebook', 49 | }, 50 | ], 51 | }, 52 | 'description': 'Report a phishing domain with a sub-domain match', 53 | }, 54 | 55 | { 56 | 'report': { 57 | 'all_domains': ['login-appleid.apple.com.managesuppport.co'], 58 | 'analysers': [ 59 | { 60 | 'analyser': 'AhoCorasickDomainMatching', 61 | 'domain': 'login-appleid.apple.com.managesuppport.co', 62 | 'match': 'apple', 63 | }, 64 | ], 65 | }, 66 | 'description': 'Report a phishing domain with a partial string match', 67 | }, 68 | 69 | { 70 | 'report': {}, 71 | 'description': 'Report nothing and thus will be ignored', 72 | }, 73 | ] 74 | 75 | for case in cases: 76 | self.reporter.publish(case['report']) 77 | 78 | with open(self.tmp.name) as fhandler: 79 | lines = fhandler.readlines() 80 | 81 | for index, line in enumerate(lines): 82 | got = json.loads(line) 83 | self.assertDictEqual(got, cases[index]['report'], cases[index]['description']) 84 | -------------------------------------------------------------------------------- /bin/domain_matching.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | A simple utility to query certstream and match its records with a list 4 | of known domains (from OpenDNS). This script can also save the data into 5 | downstream storage for further processing, for example, Elasticsearch. 6 | """ 7 | import argparse 8 | import logging 9 | import signal 10 | import sys 11 | import time 12 | 13 | from certstream_analytics.analysers import AhoCorasickDomainMatching 14 | from certstream_analytics.analysers import WordSegmentation 15 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption 16 | from certstream_analytics.analysers import BulkDomainMarker 17 | from certstream_analytics.analysers import IDNADecoder 18 | from certstream_analytics.analysers import HomoglyphsDecoder 19 | from certstream_analytics.analysers import FeaturesGenerator 20 | from certstream_analytics.transformers import CertstreamTransformer 21 | from certstream_analytics.reporters import FileReporter 22 | from certstream_analytics.storages import ElasticsearchStorage 23 | from certstream_analytics.stream import CertstreamAnalytics 24 | 25 | DONE = False 26 | 27 | 28 | # pylint: disable=unused-argument 29 | def exit_gracefully(signum, stack): 30 | """ 31 | Just to be nice. 32 | """ 33 | # pylint: disable=global-statement 34 | global DONE 35 | DONE = True 36 | 37 | 38 | def init_analysers(domains_file, include_tld, matching_option): 39 | """ 40 | Initialize all the analysers for matching domains. The list includes: 41 | 42 | - IDNA 43 | - Homoglyphs 44 | - AhoCorasick 45 | - Word segmentation 46 | - Bulk domains 47 | - Meta domain matching 48 | """ 49 | with open(domains_file) as fhandle: 50 | domains = [line.rstrip() for line in fhandle] 51 | 52 | # Initialize all analysers. Note that their order is important cause they 53 | # will be executed in that order 54 | return [ 55 | IDNADecoder(), 56 | HomoglyphsDecoder(greedy=False), 57 | AhoCorasickDomainMatching(domains=domains), 58 | WordSegmentation(), 59 | BulkDomainMarker(), 60 | DomainMatching(include_tld=include_tld, option=matching_option), 61 | FeaturesGenerator(), 62 | ] 63 | 64 | 65 | def run(): 66 | """ 67 | A simple utility to query certstream and match its records to a list of 68 | known domains from OpenDNS. 69 | """ 70 | epilog = ''' 71 | examples: 72 | \033[1;33m/usr/bin/domain_matching.py --elasticsearch-host elasticsearch:9200\033[0m 73 | 74 | \033[1;33m/usr/bin/domain_matching.py --dump-location certstream.txt\033[0m 75 | 76 | \033[1;33m/usr/bin/domain_matching.py --domains opendns-top-domains.txt\033[0m 77 | 78 | Consume data from Certstream and does its magic. 79 | ''' 80 | parser = argparse.ArgumentParser(description=__doc__, epilog=epilog, 81 | formatter_class=argparse.RawDescriptionHelpFormatter) 82 | 83 | parser.add_argument('--domains', 84 | help='the list of domains to match with (e.g. opendns-top-domains.txt)') 85 | 86 | parser.add_argument('--elasticsearch-host', 87 | help='set the Elasticsearch host to store the records from Certstream') 88 | 89 | parser.add_argument('--dump-location', 90 | help='where to dump the records from Certstream') 91 | 92 | try: 93 | args = parser.parse_args() 94 | # pylint: disable=broad-except 95 | except Exception as error: 96 | logging.error(error) 97 | # some errors occur when parsing the arguments, show the usage 98 | parser.print_help() 99 | # then quit 100 | sys.exit(1) 101 | 102 | transformer = CertstreamTransformer() 103 | analysers = init_analysers(domains_file=args.domains, 104 | include_tld=True, 105 | matching_option=DomainMatchingOption.ORDER_MATCH) 106 | reporter = FileReporter(path=args.dump_location) if args.dump_location else None 107 | storage = ElasticsearchStorage(hosts=[args.elasticsearch_host]) if args.elasticsearch_host else None 108 | 109 | engine = CertstreamAnalytics(transformer=transformer, 110 | storages=storage, 111 | analysers=analysers, 112 | reporters=reporter) 113 | engine.start() 114 | 115 | while not DONE: 116 | time.sleep(1) 117 | 118 | engine.stop() 119 | 120 | 121 | if __name__ == '__main__': 122 | # Make sure that we can exit gracefully 123 | signal.signal(signal.SIGINT, exit_gracefully) 124 | signal.signal(signal.SIGTERM, exit_gracefully) 125 | 126 | run() 127 | -------------------------------------------------------------------------------- /certstream_analytics/stream.py: -------------------------------------------------------------------------------- 1 | """ 2 | All hail [certstream](https://github.com/CaliDog/certstream-python)!! 3 | 4 | This module consumes the feed of certificates from certstream and does 5 | the heavy lifting. 6 | """ 7 | import sys 8 | import threading 9 | import certstream 10 | 11 | from certstream_analytics.analysers import Analyser 12 | from certstream_analytics.reporters import Reporter 13 | from certstream_analytics.storages import Storage 14 | 15 | 16 | class CertstreamAnalytics(): 17 | """ 18 | Consume the feed of certificates from certstream, transform the data, and 19 | save it into various storages. 20 | """ 21 | 22 | def __init__(self, transformer=None, storages=None, analysers=None, reporters=None): 23 | """ 24 | This is the entry point of the whole module. It consumes data from 25 | certstream, transform it using a Transformer class, save it into 26 | a predefined storage (elasticsearch), and run the use-defined 27 | analysis. 28 | 29 | The transformer can be None or a subclass of CertstreamTransformer. It 30 | transform the raw data from certstream. 31 | 32 | The storage can be None or a subclass of CertstreamStorage. A sample 33 | kind of storage is Elasticsearch. 34 | 35 | The analyser can be None or a subclass of CertstreamAnalyser. It's 36 | entirely up to the user to decide what to do here with the transformed 37 | data from certstream. 38 | 39 | The reporter, as its name implies, collects and publishes the analyser 40 | result somewhere, for example, email notification. It will be a subclass 41 | of CertstreamReporter. 42 | """ 43 | self.transformer = transformer 44 | 45 | self.analysers = [] 46 | self.reporters = [] 47 | self.storages = [] 48 | 49 | def _init_member(member, value, kind): 50 | """ 51 | Initialize all storages, analysers, and reporters. 52 | """ 53 | if value: 54 | if isinstance(value, (list, tuple)): 55 | setattr(self, member, value) 56 | else: 57 | getattr(self, member).append(value) 58 | 59 | for type_check in getattr(self, member): 60 | if not isinstance(type_check, kind): 61 | raise TypeError('Invalid {} type: {}'.format(member, type(type_check).__name__)) 62 | 63 | _init_member('analysers', analysers, Analyser) 64 | _init_member('reporters', reporters, Reporter) 65 | _init_member('storages', storages, Storage) 66 | 67 | self.stopped = True 68 | self.thread = None 69 | 70 | def start(self): 71 | """ 72 | Start consuming data from certstream. 73 | """ 74 | # Run the stream in a separate thread 75 | self.thread = threading.Thread(target=self._consume) 76 | # So that it will be killed when the main thread stop 77 | self.thread.daemon = True 78 | self.thread.start() 79 | 80 | def stop(self): 81 | """ 82 | Stop consuming data from certstream. 83 | """ 84 | if self.stopped: 85 | return 86 | 87 | self.stopped = True 88 | self.thread.join() 89 | 90 | def _consume(self): 91 | """ 92 | Start consuming the data from certstream. 93 | """ 94 | self.stopped = False 95 | # pylint: disable=unnecessary-lambda 96 | certstream.listen_for_events(lambda m, c: self._callback(m, c), 97 | url='wss://certstream.calidog.io') 98 | 99 | # pylint: disable=unused-argument 100 | def _callback(self, message, context): 101 | """ 102 | The callback handler template itself. 103 | """ 104 | if self.stopped: 105 | sys.exit() 106 | 107 | if message['message_type'] == 'heartbeat': 108 | return 109 | 110 | if message['message_type'] == 'certificate_update': 111 | if self.transformer: 112 | # Apply the user-defined transformation. The structure of the raw 113 | # message is at See https://github.com/CaliDog/certstream-python/ 114 | transformed_message = self.transformer.apply(message) 115 | else: 116 | transformed_message = message 117 | 118 | if self.storages and transformed_message: 119 | # Save the message into a more permanent storage. May be we should 120 | # support multiple storages in parallel here 121 | for storage in self.storages: 122 | storage.save(transformed_message) 123 | 124 | if self.analysers: 125 | # Note that the order of analysers is extremely important cause the 126 | # output of an analyser will be come the input of the next analyser 127 | for analyser in self.analysers: 128 | if not transformed_message: 129 | break 130 | 131 | # Run something here 132 | transformed_message = analyser.run(transformed_message) 133 | 134 | if self.reporters and transformed_message: 135 | # and report the final result 136 | for reporter in self.reporters: 137 | reporter.publish(transformed_message) 138 | -------------------------------------------------------------------------------- /scripts/replay.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | Replay a stream of records from certstream to test the processing pipeline. 4 | ''' 5 | import argparse 6 | import json 7 | import logging 8 | import sys 9 | 10 | from certstream_analytics.analysers import AhoCorasickDomainMatching 11 | from certstream_analytics.analysers import WordSegmentation 12 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption 13 | from certstream_analytics.analysers import BulkDomainMarker 14 | from certstream_analytics.analysers import IDNADecoder 15 | from certstream_analytics.analysers import HomoglyphsDecoder 16 | from certstream_analytics.analysers import FeaturesGenerator 17 | from certstream_analytics.reporters import FileReporter 18 | from certstream_analytics.storages import ElasticsearchStorage 19 | 20 | 21 | SUPPORTED_REPORTERS = { 22 | 'file': lambda location: FileReporter(path=location) 23 | } 24 | 25 | SUPPORTED_STORAGES = { 26 | 'elasticsearch': lambda host: ElasticsearchStorage(hosts=[host]) 27 | } 28 | 29 | 30 | def init_analysers(domains_file, include_tld, matching_option): 31 | ''' 32 | Initialize all the analysers for matching domains. The list includes: 33 | 34 | - IDNA 35 | - Homoglyphs 36 | - AhoCorasick 37 | - Word segmentation 38 | - Bulk domains 39 | - Meta domain matching 40 | ''' 41 | with open(domains_file) as fhandle: 42 | domains = [line.rstrip() for line in fhandle] 43 | 44 | # Initialize all analysers. Note that their order is important cause they 45 | # will be executed in that order 46 | return [ 47 | IDNADecoder(), 48 | HomoglyphsDecoder(greedy=False), 49 | AhoCorasickDomainMatching(domains=domains), 50 | WordSegmentation(), 51 | BulkDomainMarker(), 52 | DomainMatching(include_tld=include_tld, option=matching_option), 53 | FeaturesGenerator(), 54 | ] 55 | 56 | 57 | def run(): 58 | ''' 59 | A simple utility to replay certstream and match the records to a list of 60 | known domains from OpenDNS. It also generates several features for each 61 | domain such as the domain length. 62 | ''' 63 | epilog = ''' 64 | examples: 65 | \033[1;33m/usr/bin/replay.py --replay certstream.txt\033[0m 66 | 67 | \033[1;33m/usr/bin/replay.py --storage-host elasticsearch:9200 --storage elasticsearch\033[0m 68 | 69 | \033[1;33m/usr/bin/replay.py --report-location report.txt --report file\033[0m 70 | 71 | \033[1;33m/usr/bin/replay.py --domains opendns-top-domains.txt\033[0m 72 | 73 | Replay data from certstream. 74 | ''' 75 | parser = argparse.ArgumentParser(description=__doc__, epilog=epilog, 76 | formatter_class=argparse.RawDescriptionHelpFormatter) 77 | 78 | parser.add_argument('--replay', 79 | help='the list of records from certstream (one per line)') 80 | parser.add_argument('--domains', 81 | help='the list of domains to match with (opendns-top-domains.txt)') 82 | 83 | parser.add_argument('--storage-host', default='localhost:9200', 84 | help='set the storage host') 85 | parser.add_argument('-s', '--storage', 86 | help='choose the storage type (elasticsearch)') 87 | 88 | parser.add_argument('--report-location', 89 | help='where to save the report to?') 90 | parser.add_argument('-r', '--report', default='file', 91 | help='choose the reporter type') 92 | 93 | try: 94 | args = parser.parse_args() 95 | # pylint: disable=broad-except 96 | except Exception as error: 97 | logging.error(error) 98 | # some errors occur when parsing the arguments, show the usage 99 | parser.print_help() 100 | # then quit 101 | sys.exit(1) 102 | 103 | if args.report and args.report not in SUPPORTED_REPORTERS: 104 | error = 'Report type \033[1;31m{}\033[0m is not supported. The list of supported reporters includes: {}' \ 105 | .format(args.report, list(SUPPORTED_REPORTERS.keys())) 106 | 107 | logging.error(error) 108 | # Encounter an unsupported storage type 109 | sys.exit(1) 110 | 111 | if args.storage and args.storage not in SUPPORTED_STORAGES: 112 | error = 'Storage type \033[1;31m{}\033[0m is not supported. The list of supported storages includes: {}' \ 113 | .format(args.storage, list(SUPPORTED_STORAGES.keys())) 114 | 115 | logging.error(error) 116 | # Encounter an unsupported storage type 117 | sys.exit(1) 118 | 119 | analysers = init_analysers(domains_file=args.domains, 120 | include_tld=True, 121 | matching_option=DomainMatchingOption.ORDER_MATCH) 122 | 123 | if args.report: 124 | reporter = SUPPORTED_REPORTERS[args.report](args.report_location) 125 | 126 | if args.storage: 127 | storage = SUPPORTED_STORAGES[args.storage](args.storage_host) 128 | 129 | with open(args.replay) as fhandler: 130 | for raw in fhandler: 131 | try: 132 | record = json.loads(raw) 133 | except json.decoder.JSONDecodeError: 134 | continue 135 | 136 | if args.storage: 137 | storage.save(record) 138 | 139 | for analyser in analysers: 140 | # Run something here 141 | record = analyser.run(record) 142 | 143 | reporter.publish(record) 144 | 145 | if __name__ == '__main__': 146 | run() 147 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Certstream + Analytics 2 | 3 | [![Build Status](https://travis-ci.org/huydhn/certstream-analytics.svg?branch=master)](https://travis-ci.org/huydhn/certstream-analytics) 4 | [![codecov.io](https://codecov.io/gh/huydhn/certstream-analytics/master.svg)](http://codecov.io/gh/huydhn/certstream-analytics?branch=master) 5 | 6 | 7 | # Installation 8 | 9 | The package can be installed from 10 | [PyPI](https://pypi.org/project/certstream-analytics) 11 | 12 | ``` 13 | pip install certstream-analytics 14 | ``` 15 | 16 | # Quick usage 17 | 18 | ```python 19 | bin/domain_matching.py --domains domains.txt --dump-location certstream.txt 20 | 21 | # The file domains.txt contains the list of domains that we want to monitor 22 | # for matches (domains with similar names). For examples, a file with only 23 | # two entries: 24 | # 25 | # gmail.com 26 | # facebook.com 27 | # 28 | # will match any domains that contains gmail or facebook keywords. 29 | # 30 | # All the records consumed from certstream will be kept in certstream.txt 31 | ``` 32 | 33 | # API 34 | 35 | ```python 36 | import time 37 | 38 | from certstream_analytics.analysers import WordSegmentation 39 | from certstream_analytics.analysers import IDNADecoder 40 | from certstream_analytics.analysers import HomoglyphsDecoder 41 | 42 | from certstream_analytics.transformers import CertstreamTransformer 43 | from certstream_analytics.storages import ElasticsearchStorage 44 | from certstream_analytics.stream import CertstreamAnalytics 45 | 46 | done = False 47 | 48 | # These analysers will be run in the same order 49 | analyser = [ 50 | IDNADecoder(), 51 | HomoglyphsDecoder(), 52 | WordSegmentation(), 53 | ] 54 | 55 | # The following fields are filtered out and indexed: 56 | # - String: domain 57 | # - List: SAN 58 | # - List: Trust chain 59 | # - Timestamp: Not before 60 | # - Timestamp: Not after 61 | # - Timestamp: Seen 62 | transformer = CertstreamTransformer() 63 | 64 | # Indexed the data in Elasticsearch 65 | storage = ElasticsearchStorage(hosts=['localhost:9200']) 66 | 67 | consumer = CertstreamAnalytics(transformer=transformer, 68 | storage=storage, 69 | analyser=analyser) 70 | # The consumer is run in another thread so this function is non-blocking 71 | consumer.start() 72 | 73 | while not done: 74 | time.sleep(1) 75 | 76 | consumer.stop() 77 | ``` 78 | 79 | ## IDNA decoder 80 | This analyser decode IDNA domain name into Unicode for further processing 81 | downstream. Normally, it will be the very first analyser to be run. If 82 | the analyser encounters a malform IDNA domain string, it will keep the 83 | domain as it is. 84 | 85 | ```python 86 | from certstream_analytics.analysers import IDNADecoder 87 | 88 | decoder = IDNADecoder() 89 | 90 | # Just an example dummy record 91 | record = { 92 | 'all_domains': [ 93 | 'xn--f1ahbgpekke1h.xn--p1ai', 94 | ] 95 | } 96 | 97 | # The domain name will now become 'укрэмпужск.рф' 98 | print(decoder.run(record)) 99 | ``` 100 | 101 | ## Homoglyphs decoder 102 | There are lots of phishing websites that utilize [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) 103 | to lure the victims. Some common examples include 'l' and 'i' or the 104 | Unicode character RHO '𝞀' and 'p'. The homoglyphs decoder uses the excellent 105 | [confusable_homoglyphs](https://github.com/vhf/confusable_homoglyphs) to 106 | generate all potential alternative domain names in ASCII. 107 | 108 | ```python 109 | from certstream_analytics.analysers import HomoglyphsDecoder 110 | 111 | # If the greedy flag is set, all alternative domains will be returned 112 | decoder = HomoglyphsDecoder(greed=False) 113 | 114 | # Just an example dummy record 115 | record = { 116 | 'all_domains': [ 117 | # MATHEMATICAL MONOSPACE SMALL P 118 | '*.𝗉aypal.com', 119 | 120 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO 121 | '*.𝗉ay𝞀al.com', 122 | ] 123 | } 124 | 125 | # The domain name will now be converted to '*.paypal.com' with the ASCII 126 | # character p 127 | print(decoder.run(record)) 128 | ``` 129 | 130 | ## Aho-Corasick 131 | A domain and its SAN from Certstream will be compared against a list of 132 | most popular [domains](https://github.com/opendns/public-domain-lists) 133 | (from OpenDNS) using Aho-Corasick algorithm. This is a simple check to 134 | remove some of the most obvious phishing domains, for examples, *www.facebook.com.msg40.site* 135 | will match with *facebook* cause *facebook* is in the above list of most 136 | popular domains (I wonder how long it is going to last). 137 | 138 | ```python 139 | from certstream_analytics.analysers import AhoCorasickDomainMatching 140 | from certstream_analytics.reporter import FileReporter 141 | 142 | # Print the list of matching domains 143 | reporter = FileReporter('matching-results.txt') 144 | 145 | with open('opendns-top-domains.txt')) as fhandle: 146 | domains = [line.rstrip() for line in fhandle] 147 | 148 | # The list of domains to match against 149 | domain_matching_analyser = AhoCorasickDomainMatching(domains) 150 | 151 | consumer = CertstreamAnalytics(transformer=transformer, 152 | analyser=domain_matching_analyser, 153 | reporter=reporter) 154 | 155 | # Need to think about what to do with the matching result 156 | consumer.start() 157 | 158 | while not done: 159 | time.sleep(1) 160 | 161 | consumer.stop() 162 | ``` 163 | 164 | ## Word segmentation 165 | In order to improve the accuracy of the matching algorithm, we segment 166 | the domains into English words using 167 | [wordsegment](https://github.com/grantjenks/python-wordsegment). 168 | 169 | ```python 170 | from certstream_analytics.analysers import WordSegmentation 171 | 172 | wordsegmentation = WordSegmentation() 173 | 174 | # Just an example dummy record 175 | record = { 176 | 'all_domains': [ 177 | 'login-appleid.apple.com.managesupport.co', 178 | ] 179 | } 180 | 181 | # The returned output is as follows: 182 | # 183 | # { 184 | # 'analyser': 'WordSegmentation', 185 | # 'output': { 186 | # 'login-appleid.apple.com.managesuppport.co': [ 187 | # 'login', 188 | # 'apple', 189 | # 'id', 190 | # 'apple', 191 | # 'com', 192 | # 'manage', 193 | # 'support', 194 | # 'co' 195 | # ], 196 | # }, 197 | # 198 | print(decoder.run(record)) 199 | ``` 200 | 201 | ## Features generator 202 | A list of features for each domain will also be generated so that they 203 | can be used for classification jobs further downstream. The list 204 | includes: 205 | 206 | - The number of dot-separated fields in the domain, for example, www.google.com has 3. 207 | - The overall length of the domain in characters. 208 | - The length of the longest dot-separate field . 209 | - The length of the TLD, e.g. .online (6) or .download (8) is longer than .com (3). 210 | - The randomness level of the domain. [Nostril](https://github.com/casics/nostril) 211 | package is used to check how many words as returned by the WordSegmentation 212 | analyser are non-sense. 213 | -------------------------------------------------------------------------------- /certstream_analytics/analysers/domain_matching.py: -------------------------------------------------------------------------------- 1 | """ 2 | Verify the domain against the list of most popular domains from OpenDNS 3 | (https://github.com/opendns/public-domain-lists). Let's see how useful 4 | it is to prevent phishing domains. 5 | """ 6 | from enum import Enum 7 | 8 | import json 9 | import logging 10 | import re 11 | import tldextract 12 | import ahocorasick 13 | import wordsegment 14 | 15 | from .base import Analyser 16 | from .common_domain_analyser import BulkDomainMarker 17 | from .common_domain_analyser import WordSegmentation 18 | 19 | 20 | # pylint: disable=too-few-public-methods 21 | class AhoCorasickDomainMatching(Analyser): 22 | """ 23 | The domain and its SAN will be compared against the list of domains, for 24 | example, the most popular domains from OpenDNS. 25 | """ 26 | # Get this number from the histogram of the length of all top domains 27 | MIN_MATCHING_LENGTH = 3 28 | 29 | # Some domains that don't work too well with tldextract and generate too 30 | # many FPs 31 | EXCLUDED_DOMAINS = { 32 | 'www': 1, 33 | 'web': 1, 34 | } 35 | 36 | # Some common domain parts that cause too many FP 37 | IGNORED_PARTS = r'^(autodiscover\.|cpanel\.)' 38 | 39 | def __init__(self, domains): 40 | """ 41 | Use Aho-Corasick to find the matching domain so we construct its Trie 42 | here. Thought: How the f**k is com.com in the list? 43 | """ 44 | self.automaton = ahocorasick.Automaton() 45 | self.domains = {} 46 | 47 | for index, domain in enumerate(domains): 48 | # Processing only the domain part. All sub-domains or TLDs will 49 | # be ignored, for example: 50 | # - www.google.com becomes google 51 | # - www.google.co.uk becomes google 52 | # - del.icio.us becomes icio 53 | ext = tldextract.extract(domain) 54 | 55 | if ext.domain in AhoCorasickDomainMatching.EXCLUDED_DOMAINS: 56 | continue 57 | 58 | self.automaton.add_word(ext.domain, (index, ext.domain)) 59 | self.domains[ext.domain] = domain 60 | 61 | self.automaton.make_automaton() 62 | 63 | def run(self, record): 64 | """ 65 | Use Aho-Corasick to find the matching domain. Check the time complexity 66 | of this function later. 67 | 68 | Tricky situation #1: When the string (domain) in the Trie is too short, 69 | it could match many domains, for example, g.co or t.co. So they need 70 | to be ignored somehow. Looking at the histogram of the length of all 71 | domains in the list, there are only less than 100 domains with the 72 | length of 2 or less. So we choose to ignore those. Also, we will 73 | prefer longer match than a shorter one for now. 74 | """ 75 | if 'analysers' not in record: 76 | record['analysers'] = [] 77 | 78 | results = {} 79 | # Check the domain and all its SAN 80 | for domain in record['all_domains']: 81 | # Remove wildcard 82 | domain = re.sub(r'^\*\.', '', domain) 83 | 84 | # Remove some FP-prone parts 85 | domain = re.sub(AhoCorasickDomainMatching.IGNORED_PARTS, '', domain) 86 | 87 | # Similar to all domains in the list, the TLD will be stripped off 88 | ext = tldextract.extract(domain) 89 | # The match will be a tuple in the following format: (5, (0, 'google')) 90 | matches = [m[1][1] for m in self.automaton.iter('.'.join(ext[:2])) 91 | if len(m[1][1]) >= AhoCorasickDomainMatching.MIN_MATCHING_LENGTH] 92 | 93 | if matches: 94 | matches.sort(key=len) 95 | 96 | match = matches[-1] 97 | # We only keep the the longest match of the first matching domain 98 | # for now 99 | results[domain] = [self.domains[match]] if match in self.domains else match 100 | break 101 | 102 | if results: 103 | record['analysers'].append({ 104 | 'analyser': type(self).__name__, 105 | 'output': results, 106 | }) 107 | 108 | return record 109 | 110 | 111 | class DomainMatchingOption(Enum): 112 | """ 113 | Control how strict we want to do our matching. 114 | """ 115 | # For example applefake.it will match with apple.com case ['apple'] is 116 | # a subset of ['apple', 'fake'] 117 | SUBSET_MATCH = 0 118 | 119 | # Similar but use in instead of issubset so that the order is preserved 120 | ORDER_MATCH = 1 121 | 122 | 123 | class DomainMatching(Analyser): 124 | """ 125 | This is the first example of the new group of meta analysers which are used 126 | to combine the result of other analysers. 127 | """ 128 | def __init__(self, include_tld=True, option=DomainMatchingOption.ORDER_MATCH): 129 | """ 130 | Just load the wordsegment package, whatever it is. 131 | """ 132 | wordsegment.load() 133 | 134 | # Save the matching option here so we can refer to it later 135 | self.include_tld = include_tld 136 | 137 | self.option = { 138 | DomainMatchingOption.SUBSET_MATCH: set, 139 | DomainMatchingOption.ORDER_MATCH: list, 140 | }[option] 141 | 142 | def run(self, record): 143 | """ 144 | Note that a meta-analyser will need to run after other analysers have 145 | finished so that their outputs are available. 146 | """ 147 | if 'analysers' not in record: 148 | return record 149 | 150 | analysers = { 151 | AhoCorasickDomainMatching.__name__: {}, 152 | WordSegmentation.__name__: {}, 153 | BulkDomainMarker.__name__: {}, 154 | } 155 | 156 | for analyser in record['analysers']: 157 | name = analyser['analyser'] 158 | 159 | if name not in analysers: 160 | continue 161 | 162 | if name == BulkDomainMarker.__name__ and analyser['output']: 163 | # Skip bulk record and deal with it later, with such large 164 | # number of SAN name, it's bound to be a match 165 | continue 166 | 167 | analysers[name] = analyser['output'] 168 | 169 | # Check that all outputs are there before continuing 170 | if not analysers[AhoCorasickDomainMatching.__name__] or not analysers[WordSegmentation.__name__]: 171 | return record 172 | 173 | results = self._match(analysers[AhoCorasickDomainMatching.__name__], 174 | analysers[WordSegmentation.__name__]) 175 | 176 | if results: 177 | record['analysers'].append({ 178 | 'analyser': type(self).__name__, 179 | 'output': results, 180 | }) 181 | 182 | # DEBUG 183 | logging.info(json.dumps(record)) 184 | 185 | return record 186 | 187 | def _match(self, ahocorasick_output, segmentation_output): 188 | """ 189 | Use internally by the run function to combine AhoCorasick and WordSegmentation 190 | results. 191 | """ 192 | results = {} 193 | # Check all the matching domains reported by AhoCorasick analyser 194 | for match, domains in ahocorasick_output.items(): 195 | # The result of AhoCorasick matcher is a list of matching domains, for example, 196 | # 197 | # { 198 | # 'analyser': 'AhoCorasickDomainMatching', 199 | # 'output': { 200 | # 'login-appleid.apple.com.managesuppport.co': ['apple.com', 'support.com'], 201 | # }, 202 | # }, 203 | # 204 | if match not in segmentation_output: 205 | continue 206 | 207 | phish = self.option(segmentation_output[match]) 208 | match_ext = tldextract.extract(match) 209 | 210 | for domain in domains: 211 | ext = tldextract.extract(domain) 212 | 213 | # This record is from a legitimate source, for example, agrosupport.zendesk.com 214 | # will match with zendesk.com. In our case, we don't really care about this so 215 | # it will be ignored and not reported as a match. 216 | if ext[1:] == match_ext[1:]: 217 | continue 218 | 219 | tmp = [] 220 | # Intuitively, it will be more accurate if we choose to include the TLD here. 221 | # For example, if both 'apple' and 'com' appear in the matching domain, it's 222 | # very likely that something phishing is going on here. On the other hand, 223 | # if only 'apple' occurs, we are not so sure and it's better left for more 224 | # advance analysers to have their says in that 225 | for part in ext[:] if self.include_tld else ext[:2]: 226 | for token in part.split('.'): 227 | tmp.extend(wordsegment.segment(token)) 228 | 229 | legit = self.option(tmp) 230 | 231 | if (isinstance(phish, set) and legit.issubset(phish)) or \ 232 | (isinstance(phish, list) and '.{}'.format('.'.join(legit)) in '.'.join(phish)): 233 | # Found a possible phishing domain 234 | if match not in results: 235 | results[match] = [] 236 | 237 | results[match].append(domain) 238 | 239 | return results 240 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Add files or directories to the blacklist. They should be base names, not 11 | # paths. 12 | ignore=CVS 13 | 14 | # Pickle collected data for later comparisons. 15 | persistent=yes 16 | 17 | # List of plugins (as comma separated values of python modules names) to load, 18 | # usually to register additional checkers. 19 | load-plugins= 20 | 21 | # Use multiple processes to speed up Pylint. 22 | jobs=4 23 | 24 | # Allow loading of arbitrary C extensions. Extensions are imported into the 25 | # active Python interpreter and may run arbitrary code. 26 | unsafe-load-any-extension=no 27 | 28 | # A comma-separated list of package or module names from where C extensions may 29 | # be loaded. Extensions are loading into the active Python interpreter and may 30 | # run arbitrary code 31 | extension-pkg-whitelist= 32 | 33 | 34 | [MESSAGES CONTROL] 35 | 36 | # Only show warnings with the listed confidence levels. Leave empty to show 37 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 38 | confidence= 39 | 40 | # Enable the message, report, category or checker with the given id(s). You can 41 | # either give multiple identifier separated by comma (,) or put this option 42 | # multiple time. See also the "--disable" option for examples. 43 | #enable= 44 | 45 | # Disable the message, report, category or checker with the given id(s). You 46 | # can either give multiple identifiers separated by comma (,) or put this 47 | # option multiple times (only on the command line, not in the configuration 48 | # file where it should appear only once).You can also use "--disable=all" to 49 | # disable everything first and then reenable specific checks. For example, if 50 | # you want to run only the similarities checker, you can use "--disable=all 51 | # --enable=similarities". If you want to run only the classes checker, but have 52 | # no Warning level messages displayed, use"--disable=all --enable=classes 53 | # --disable=W" 54 | 55 | disable=fixme,locally-disabled 56 | 57 | [REPORTS] 58 | 59 | # Set the output format. Available formats are text, parseable, colorized, msvs 60 | # (visual studio) and html. You can also give a reporter class, eg 61 | # mypackage.mymodule.MyReporterClass. 62 | output-format=parseable 63 | 64 | # Put messages in a separate file for each module / package specified on the 65 | # command line instead of printing them on stdout. Reports (if any) will be 66 | # written in a file name "pylint_global.[txt|html]". 67 | files-output=no 68 | 69 | # Tells whether to display a full report or only the messages 70 | reports=yes 71 | 72 | # Python expression which should return a note less than 10 (10 is the highest 73 | # note). You have access to the variables errors warning, statement which 74 | # respectively contain the number of errors / warnings messages and the total 75 | # number of statements analyzed. This is used by the global evaluation report 76 | # (RP0004). 77 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 78 | 79 | # Template used to display messages. This is a python new-style format string 80 | # used to format the message information. See doc for all details 81 | #msg-template= 82 | 83 | 84 | [LOGGING] 85 | 86 | # Logging modules to check that the string format arguments are in logging 87 | # function parameter format 88 | logging-modules=logging 89 | 90 | 91 | [MISCELLANEOUS] 92 | 93 | # List of note tags to take in consideration, separated by a comma. 94 | notes=FIXME,XXX,TODO 95 | 96 | 97 | [SIMILARITIES] 98 | 99 | # Minimum lines number of a similarity. 100 | min-similarity-lines=4 101 | 102 | # Ignore comments when computing similarities. 103 | ignore-comments=yes 104 | 105 | # Ignore docstrings when computing similarities. 106 | ignore-docstrings=yes 107 | 108 | # Ignore imports when computing similarities. 109 | ignore-imports=no 110 | 111 | 112 | [VARIABLES] 113 | 114 | # Tells whether we should check for unused import in __init__ files. 115 | init-import=no 116 | 117 | # A regular expression matching the name of dummy variables (i.e. expectedly 118 | # not used). 119 | dummy-variables-rgx=_$|dummy 120 | 121 | # List of additional names supposed to be defined in builtins. Remember that 122 | # you should avoid to define new builtins when possible. 123 | additional-builtins= 124 | 125 | # List of strings which can identify a callback function by name. A callback 126 | # name must start or end with one of those strings. 127 | callbacks=cb_,_cb 128 | 129 | 130 | [FORMAT] 131 | 132 | # Maximum number of characters on a single line. 133 | max-line-length=120 134 | 135 | # Regexp for a line that is allowed to be longer than the limit. 136 | ignore-long-lines=^\s*(# )??$ 137 | 138 | # Allow the body of an if to be on the same line as the test if there is no 139 | # else. 140 | single-line-if-stmt=no 141 | 142 | # List of optional constructs for which whitespace checking is disabled 143 | no-space-check=trailing-comma,dict-separator 144 | 145 | # Maximum number of lines in a module 146 | max-module-lines=1000 147 | 148 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 149 | # tab). 150 | indent-string=' ' 151 | 152 | # Number of spaces of indent required inside a hanging or continued line. 153 | indent-after-paren=4 154 | 155 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 156 | expected-line-ending-format= 157 | 158 | 159 | [BASIC] 160 | 161 | # List of builtins function names that should not be used, separated by a comma 162 | bad-functions=map,filter,input 163 | 164 | # Good variable names which should always be accepted, separated by a comma 165 | good-names=i,j,k,ex,Run,_ 166 | 167 | # Bad variable names which should always be refused, separated by a comma 168 | bad-names=foo,bar,baz,toto,tutu,tata 169 | 170 | # Colon-delimited sets of names that determine each other's naming style when 171 | # the name regexes allow several styles. 172 | name-group= 173 | 174 | # Include a hint for the correct naming format with invalid-name 175 | include-naming-hint=no 176 | 177 | # Regular expression matching correct function names 178 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 179 | 180 | # Naming hint for function names 181 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 182 | 183 | # Regular expression matching correct variable names 184 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 185 | 186 | # Naming hint for variable names 187 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 188 | 189 | # Regular expression matching correct constant names 190 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 191 | 192 | # Naming hint for constant names 193 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 194 | 195 | # Regular expression matching correct attribute names 196 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 197 | 198 | # Naming hint for attribute names 199 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 200 | 201 | # Regular expression matching correct argument names 202 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 203 | 204 | # Naming hint for argument names 205 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 206 | 207 | # Regular expression matching correct class attribute names 208 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 209 | 210 | # Naming hint for class attribute names 211 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 212 | 213 | # Regular expression matching correct inline iteration names 214 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 215 | 216 | # Naming hint for inline iteration names 217 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 218 | 219 | # Regular expression matching correct class names 220 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 221 | 222 | # Naming hint for class names 223 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 224 | 225 | # Regular expression matching correct module names 226 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 227 | 228 | # Naming hint for module names 229 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 230 | 231 | # Regular expression matching correct method names 232 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 233 | 234 | # Naming hint for method names 235 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 236 | 237 | # Regular expression which should only match function or class names that do 238 | # not require a docstring. 239 | no-docstring-rgx=__.*__ 240 | 241 | # Minimum line length for functions/classes that require docstrings, shorter 242 | # ones are exempt. 243 | docstring-min-length=-1 244 | 245 | # List of decorators that define properties, such as abc.abstractproperty. 246 | property-classes=abc.abstractproperty 247 | 248 | 249 | [TYPECHECK] 250 | 251 | # Tells whether missing members accessed in mixin class should be ignored. A 252 | # mixin class is detected if its name ends with "mixin" (case insensitive). 253 | ignore-mixin-members=yes 254 | 255 | # List of module names for which member attributes should not be checked 256 | # (useful for modules/projects where namespaces are manipulated during runtime 257 | # and thus existing member attributes cannot be deduced by static analysis 258 | ignored-modules= 259 | 260 | # List of classes names for which member attributes should not be checked 261 | # (useful for classes with attributes dynamically set). 262 | ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local 263 | 264 | # List of members which are set dynamically and missed by pylint inference 265 | # system, and so shouldn't trigger E1101 when accessed. Python regular 266 | # expressions are accepted. 267 | generated-members=REQUEST,acl_users,aq_parent 268 | 269 | # List of decorators that create context managers from functions, such as 270 | # contextlib.contextmanager. 271 | contextmanager-decorators=contextlib.contextmanager 272 | 273 | 274 | [SPELLING] 275 | 276 | # Spelling dictionary name. Available dictionaries: none. To make it working 277 | # install python-enchant package. 278 | spelling-dict= 279 | 280 | # List of comma separated words that should not be checked. 281 | spelling-ignore-words= 282 | 283 | # A path to a file that contains private dictionary; one word per line. 284 | spelling-private-dict-file= 285 | 286 | # Tells whether to store unknown words to indicated private dictionary in 287 | # --spelling-private-dict-file option instead of raising a message. 288 | spelling-store-unknown-words=no 289 | 290 | 291 | [DESIGN] 292 | 293 | # Maximum number of arguments for function / method 294 | max-args=5 295 | 296 | # Argument names that match this expression will be ignored. Default to name 297 | # with leading underscore 298 | ignored-argument-names=_.* 299 | 300 | # Maximum number of locals for function / method body 301 | max-locals=15 302 | 303 | # Maximum number of return / yield for function / method body 304 | max-returns=6 305 | 306 | # Maximum number of branch for function / method body 307 | max-branches=12 308 | 309 | # Maximum number of statements in function / method body 310 | max-statements=50 311 | 312 | # Maximum number of parents for a class (see R0901). 313 | max-parents=7 314 | 315 | # Maximum number of attributes for a class (see R0902). 316 | max-attributes=7 317 | 318 | # Minimum number of public methods for a class (see R0903). 319 | min-public-methods=2 320 | 321 | # Maximum number of public methods for a class (see R0904). 322 | max-public-methods=20 323 | 324 | 325 | [CLASSES] 326 | 327 | # List of method names used to declare (i.e. assign) instance attributes. 328 | defining-attr-methods=__init__,__new__,setUp 329 | 330 | # List of valid names for the first argument in a class method. 331 | valid-classmethod-first-arg=cls 332 | 333 | # List of valid names for the first argument in a metaclass class method. 334 | valid-metaclass-classmethod-first-arg=mcs 335 | 336 | # List of member names, which should be excluded from the protected access 337 | # warning. 338 | exclude-protected=_asdict,_fields,_replace,_source,_make 339 | 340 | 341 | [IMPORTS] 342 | 343 | # Deprecated modules which should not be used, separated by a comma 344 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 345 | 346 | # Create a graph of every (i.e. internal and external) dependencies in the 347 | # given file (report RP0402 must not be disabled) 348 | import-graph= 349 | 350 | # Create a graph of external dependencies in the given file (report RP0402 must 351 | # not be disabled) 352 | ext-import-graph= 353 | 354 | # Create a graph of internal dependencies in the given file (report RP0402 must 355 | # not be disabled) 356 | int-import-graph= 357 | 358 | 359 | [EXCEPTIONS] 360 | 361 | # Exceptions that will emit a warning when being caught. Defaults to 362 | # "Exception" 363 | overgeneral-exceptions=Exception 364 | -------------------------------------------------------------------------------- /certstream_analytics/analysers/common_domain_analyser.py: -------------------------------------------------------------------------------- 1 | """ 2 | The list of basic analysers includes: 3 | - WordSegmentation 4 | - IDNADecoder 5 | - HomoglyphsDecoder 6 | - FeaturesGenerator (generate various features for further downstream processing) 7 | - BulkDomainMarker 8 | """ 9 | import re 10 | import tldextract 11 | import wordsegment 12 | from nostril import nonsense 13 | import idna 14 | from confusable_homoglyphs import confusables 15 | 16 | from .base import Analyser 17 | 18 | 19 | # pylint: disable=too-few-public-methods 20 | class WordSegmentation(Analyser): 21 | """ 22 | Perform word segmentation of all the SAN domains as an attempt to make sense 23 | of their names. For example, both arch.mappleonline.com and apple-verifyupdate.serveftp.com 24 | domains have 'apple' inside but only the second one is an actual Apple phishing 25 | page. Intuitively, a good word segmentation algorithm will return: 26 | 27 | - arch + mapple + online + com 28 | - apple + verify + update + serve + ftp + com 29 | 30 | Thus, it's much easier to spot the second phishing domain. 31 | 32 | Implementation-wise, there are several existing packages around to do this, for 33 | example: 34 | 35 | - https://github.com/grantjenks/python-wordsegment 36 | - https://github.com/keredson/wordninja 37 | 38 | Let's see what they can do, take it away! 39 | """ 40 | # Some common stop words that are in the list of most popular domains 41 | STOPWORDS = { 42 | 'app': 1, 43 | 'inc': 1, 44 | 'box': 1, 45 | 'health': 1, 46 | 'home': 1, 47 | 'space': 1, 48 | 'cars': 1, 49 | 'nature': 1, 50 | } 51 | 52 | def __init__(self): 53 | """ 54 | Just load the wordsegment package, whatever it is. 55 | """ 56 | wordsegment.load() 57 | 58 | def run(self, record): 59 | """ 60 | Apply word segment to all the SAN domain names. Let's see if it makes 61 | any sense. 62 | """ 63 | if 'analysers' not in record: 64 | record['analysers'] = [] 65 | 66 | results = {} 67 | # Check the domain and all its SAN 68 | for domain in record['all_domains']: 69 | # Remove wild card 70 | domain = re.sub(r'^\*\.', '', domain) 71 | 72 | # The TLD will be stripped off cause it does not contribute anything here 73 | ext = tldextract.extract(domain) 74 | 75 | words = [] 76 | # We choose to segment the TLD here as well, for example, .co.uk 77 | # will become ['co', 'uk']. Let see if this works out. 78 | for part in ext[:]: 79 | for token in part.split('.'): 80 | segmented = [w for w in wordsegment.segment(token) if w not in WordSegmentation.STOPWORDS] 81 | 82 | if segmented: 83 | words.extend(segmented) 84 | elif token: 85 | # For some IDNA domain like xn--wgbfq3d.xn--ngbc5azd, the segmentation 86 | # won't work and an empty array is returned. So we choose to just keep 87 | # the original token 88 | words.append(token) 89 | 90 | results[domain] = words 91 | 92 | if results: 93 | record['analysers'].append({ 94 | 'analyser': type(self).__name__, 95 | 'output': results, 96 | }) 97 | 98 | return record 99 | 100 | 101 | class BulkDomainMarker(Analyser): 102 | """ 103 | Mark the record that has tons of SAN domains in it. Most of the time, they are 104 | completely unrelated domains and probably the result of some bulk registration 105 | process. Benign or not, they are still suspicious and probably spam. We can also 106 | verify the similarity among these domains. A lower similarity score means these 107 | domains are totally unrelated. 108 | """ 109 | # Take a histogram here and find out the suitable value for this 110 | THRESHOLD = 15 111 | 112 | def __init__(self, threshold=THRESHOLD): 113 | """ 114 | Set the threshold to mark the record as a bulk record. 115 | """ 116 | self.threshold = threshold 117 | 118 | def run(self, record): 119 | """ 120 | See if the record is a bulk record. We will just use the threshold as 121 | the indicator for now. So if a record has more SAN names than the 122 | threshold, it is a bulk record. 123 | """ 124 | if 'analysers' not in record: 125 | record['analysers'] = [] 126 | 127 | is_bulked = len(record['all_domains']) >= self.threshold 128 | 129 | record['analysers'].append({ 130 | 'analyser': type(self).__name__, 131 | 'output': is_bulked, 132 | }) 133 | 134 | return record 135 | 136 | 137 | class IDNADecoder(Analyser): 138 | """ 139 | Decode all domains in IDNA format. 140 | """ 141 | def run(self, record): 142 | """ 143 | Check if a domain in the list is in IDNA format and convert it back to 144 | Unicode. 145 | """ 146 | decoded = [] 147 | 148 | for domain in record['all_domains']: 149 | wildcard = False 150 | 151 | try: 152 | if re.match(r'^\*\.', domain): 153 | wildcard = True 154 | # Remove wildcard cause it interfere with the IDNA module 155 | # and we'll put it back later 156 | domain = re.sub(r'^\*\.', '', domain) 157 | 158 | domain = idna.decode(domain) 159 | 160 | except idna.core.InvalidCodepoint: 161 | # Fail to decode the domain, just keep it as it is for now 162 | pass 163 | except UnicodeError: 164 | pass 165 | finally: 166 | if wildcard: 167 | domain = '*.{}'.format(domain) 168 | 169 | decoded.append(domain) 170 | 171 | record['all_domains'] = decoded 172 | return record 173 | 174 | 175 | class HomoglyphsDecoder(Analyser): 176 | """ 177 | Smartly convert domains whose names include some suspicious homoglyphs to 178 | ASCII. This will probably need to be right done after IDNA conversion and 179 | before other analysers so that they can get benefits from it. 180 | """ 181 | def __init__(self, greedy=False): 182 | """ 183 | We rely on the confusable-homoglyphs at https://github.com/vhf/confusable_homoglyphs 184 | to do its magic. 185 | 186 | If the greedy flag is set, all alternative domains will be returned. Otherwise, only 187 | the first one will be available. 188 | """ 189 | self.greedy = greedy 190 | 191 | @staticmethod 192 | def is_latin(alt): 193 | """ 194 | Check if a string is in Latin cause, in our specific case, we will 195 | only care about Latin characters 196 | """ 197 | lower_s = range(ord('a'), ord('z') + 1) 198 | upper_s = range(ord('A'), ord('Z') + 1) 199 | 200 | # We need to check the length of the homoglyph here cause 201 | # confusable_homoglyphs library nicely returns multi-character 202 | # match as well, for example, 'rn' has an alternative of 'm' 203 | for alt_c in alt: 204 | if ord(alt_c) not in lower_s and ord(alt_c) not in upper_s: 205 | return False 206 | 207 | return True 208 | 209 | def run(self, record): 210 | """ 211 | Using the confusable-homoglyphs, we are going to generate all alternatives ASCII 212 | names of a domain. It's a bit of a brute force though. 213 | """ 214 | decoded = [] 215 | 216 | for domain in record['all_domains']: 217 | wildcard = False 218 | 219 | if re.match(r'^\*\.', domain): 220 | wildcard = True 221 | # Remove wild card to simplify the domain name a bit and we'll put it back later 222 | domain = re.sub(r'^\*\.', '', domain) 223 | 224 | hg_map = {hg['character']: hg for hg in confusables.is_confusable(domain, greedy=True)} 225 | decoded_domain_c = [] 226 | 227 | for domain_c in domain: 228 | # Confusable homoglyphs could not find any homoglyphs for this character 229 | # so we decide to keep the original character as it is 230 | if domain_c not in hg_map: 231 | decoded_domain_c.append([domain_c]) 232 | continue 233 | 234 | found = [] 235 | hglyph = hg_map[domain_c] 236 | 237 | if hglyph['alias'] == 'LATIN': 238 | # The character is Latin, we don't need to do anything here 239 | found.append(hglyph['character']) 240 | 241 | for alt in hglyph['homoglyphs']: 242 | if HomoglyphsDecoder.is_latin(alt['c']): 243 | found.append(alt['c'].lower()) 244 | 245 | # If nothing is found, we keep the original character 246 | if not found: 247 | found.append(hglyph['character']) 248 | 249 | decoded_domain_c.append(found) 250 | 251 | for alt in self._generate_alternatives(decoded_domain_c): 252 | if wildcard: 253 | alt = '*.{}'.format(alt) 254 | 255 | decoded.append(alt) 256 | 257 | if not self.greedy: 258 | break 259 | 260 | record['all_domains'] = decoded 261 | return record 262 | 263 | def _generate_alternatives(self, alt_characters, index=0, current=''): 264 | """ 265 | Generate all alternative ASCII names of a domain using the list of all 266 | alternative characters. 267 | """ 268 | if index == len(alt_characters): 269 | yield current 270 | 271 | else: 272 | for alt_c in alt_characters[index]: 273 | yield from self._generate_alternatives(alt_characters, 274 | index + 1, 275 | current + alt_c) 276 | 277 | 278 | class FeaturesGenerator(Analyser): 279 | """ 280 | Generate features to detect outliers in the stream. In our case, the outliers is 281 | the 'suspicious' phishing domains. 282 | """ 283 | NOSTRIL_LENGTH_LIMIT = 6 284 | 285 | # pylint: disable=invalid-name 286 | def run(self, record): 287 | """ 288 | The list of features will be: 289 | - The number of domain parts, for example, www.google.com is 3. 290 | - The overall length in characters. 291 | - The length of the longest domain part. 292 | - The length of the TLD, e.g. .online or .download is longer than .com. 293 | - The randomness level of the domain. 294 | """ 295 | if 'analysers' not in record: 296 | record['analysers'] = [] 297 | 298 | x_samples = [] 299 | Y_samples = [] 300 | 301 | for analyser in record['analysers']: 302 | if analyser['analyser'] != 'WordSegmentation': 303 | continue 304 | 305 | for domain, segments in analyser['output'].items(): 306 | # Remove wildcard domain 307 | domain = re.sub(r'^\*\.', '', domain) 308 | 309 | parts = domain.split('.') 310 | 311 | x = [] 312 | # Compute the number of domain parts 313 | x.append(len(parts)) 314 | 315 | # Compute the length of the whole domain 316 | x.append(len(domain)) 317 | 318 | longest = '' 319 | # Compute the length of the longest domain parts 320 | for part in parts: 321 | if len(part) > len(longest): 322 | longest = part 323 | 324 | x.append(len(longest)) 325 | 326 | # Compute the length of the TLD 327 | x.append(len(parts[-1])) 328 | 329 | randomness_count = 0 330 | # The nostril package which we are using to detect non-sense words 331 | # in the domain only returns a boolean verdict so may be we need to 332 | # think of how we want to quantify this 333 | for w in segments: 334 | try: 335 | if len(w) >= FeaturesGenerator.NOSTRIL_LENGTH_LIMIT and nonsense(w): 336 | randomness_count += 1 337 | except ValueError: 338 | continue 339 | 340 | x.append(randomness_count / len(segments)) 341 | 342 | x_samples.append(x) 343 | Y_samples.append('usual_suspect' in record) 344 | 345 | break 346 | 347 | record['analysers'].append({ 348 | 'analyser': type(self).__name__, 349 | 'output': x_samples, 350 | }) 351 | 352 | return record 353 | -------------------------------------------------------------------------------- /tests/samples.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "data": { 4 | "cert_index": 447858050, 5 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447858050&end=447858050", 6 | "chain": [ 7 | { 8 | "as_der": "REDACT", 9 | "extensions": { 10 | "authorityInfoAccess": "OCSP - URI:http://ocsp.comodoca.com\nCA Issuers - URI:http://crt.comodoca.com/COMODORSAAddTrustCA.crt\n", 11 | "authorityKeyIdentifier": "keyid:BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4\n", 12 | "basicConstraints": "CA:TRUE", 13 | "certificatePolicies": "Policy: 2.23.140.1.2.1\nPolicy: 1.3.6.1.4.1.6449.1.2.2.52", 14 | "crlDistributionPoints": "Full Name:\n URI:http://crl.comodoca.com/COMODORSACertificationAuthority.crl", 15 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication", 16 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign", 17 | "subjectKeyIdentifier": "7E:03:5A:65:41:6B:A7:7E:0A:E1:B8:9D:08:EA:1D:8E:1D:6A:C7:65" 18 | }, 19 | "fingerprint": "76:4D:2F:A5:9E:D1:23:F9:C9:55:70:C4:03:C9:2F:EF:33:8E:A7:45", 20 | "not_after": 1747526399, 21 | "not_before": 1431907200, 22 | "serial_number": "F01D4BEE7B7CA37B3C0566AC05972458", 23 | "subject": { 24 | "C": "US", 25 | "CN": "cPanel, Inc. Certification Authority", 26 | "L": "Houston", 27 | "O": "cPanel, Inc.", 28 | "OU": null, 29 | "ST": "TX", 30 | "aggregated": "/C=US/CN=cPanel, Inc. Certification Authority/L=Houston/O=cPanel, Inc./ST=TX" 31 | } 32 | }, 33 | { 34 | "as_der": "REDACT", 35 | "extensions": { 36 | "basicConstraints": "CA:TRUE", 37 | "keyUsage": "Key Cert Sign, C R L Sign", 38 | "subjectKeyIdentifier": "BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4" 39 | }, 40 | "fingerprint": "AF:E5:D2:44:A8:D1:19:42:30:FF:47:9F:E2:F8:97:BB:CD:7A:8C:B4", 41 | "not_after": 2147471999, 42 | "not_before": 1263859200, 43 | "serial_number": "4CAAF9CADB636FE01FF74ED85B03869D", 44 | "subject": { 45 | "C": "GB", 46 | "CN": "COMODO RSA Certification Authority", 47 | "L": "Salford", 48 | "O": "COMODO CA Limited", 49 | "OU": null, 50 | "ST": "Greater Manchester", 51 | "aggregated": "/C=GB/CN=COMODO RSA Certification Authority/L=Salford/O=COMODO CA Limited/ST=Greater Manchester" 52 | } 53 | } 54 | ], 55 | "leaf_cert": { 56 | "all_domains": [ 57 | "firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl", 58 | "www.firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl" 59 | ], 60 | "as_der": "REDACT", 61 | "extensions": { 62 | "authorityInfoAccess": "OCSP - URI:http://ocsp.comodoca.com\nCA Issuers - URI:http://crt.comodoca.com/cPanelIncCertificationAuthority.crt\n", 63 | "authorityKeyIdentifier": "keyid:7E:03:5A:65:41:6B:A7:7E:0A:E1:B8:9D:08:EA:1D:8E:1D:6A:C7:65\n", 64 | "basicConstraints": "CA:FALSE", 65 | "certificatePolicies": "Policy: 2.23.140.1.2.1\nPolicy: 1.3.6.1.4.1.6449.1.2.2.52\n CPS: https://secure.comodo.com/CPS", 66 | "crlDistributionPoints": "Full Name:\n URI:http://crl.comodoca.com/cPanelIncCertificationAuthority.crl", 67 | "ctlPoisonByte": true, 68 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication", 69 | "keyUsage": "Digital Signature, Key Encipherment", 70 | "subjectAltName": "DNS:www.firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl, DNS:firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl", 71 | "subjectKeyIdentifier": "A6:F3:1B:BD:CB:A6:E0:95:E4:EA:86:C5:9D:FE:BC:9E:B1:C4:0B:FD" 72 | }, 73 | "fingerprint": "32:8B:E0:CA:41:25:E0:EB:CD:92:29:7F:F3:17:3C:06:2C:3C:1F:D0", 74 | "not_after": 1546473599, 75 | "not_before": 1538611200, 76 | "serial_number": "DA28422511646C0552500F3DEE0AC20", 77 | "subject": { 78 | "C": null, 79 | "CN": "firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl", 80 | "L": null, 81 | "O": null, 82 | "OU": null, 83 | "ST": null, 84 | "aggregated": "/CN=firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl" 85 | } 86 | }, 87 | "seen": 1538635262.355275, 88 | "source": { 89 | "name": "Google 'Rocketeer' log", 90 | "url": "ct.googleapis.com/rocketeer/" 91 | }, 92 | "update_type": "PrecertLogEntry" 93 | }, 94 | "message_type": "certificate_update" 95 | }, 96 | { 97 | "data": { 98 | "cert_index": 447858049, 99 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447858049&end=447858049", 100 | "chain": [ 101 | { 102 | "as_der": "REDACT", 103 | "extensions": { 104 | "authorityInfoAccess": "CA Issuers - URI:http://apps.identrust.com/roots/dstrootcax3.p7c\nOCSP - URI:http://isrg.trustid.ocsp.identrust.com\n", 105 | "authorityKeyIdentifier": "keyid:C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10\n", 106 | "basicConstraints": "CA:TRUE", 107 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.root-x1.letsencrypt.org", 108 | "crlDistributionPoints": "Full Name:\n URI:http://crl.identrust.com/DSTROOTCAX3CRL.crl", 109 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign", 110 | "subjectKeyIdentifier": "A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1" 111 | }, 112 | "fingerprint": "E6:A3:B4:5B:06:2D:50:9B:33:82:28:2D:19:6E:FE:97:D5:95:6C:CB", 113 | "not_after": 1615999246, 114 | "not_before": 1458232846, 115 | "serial_number": "A0141420000015385736A0B85ECA708", 116 | "subject": { 117 | "C": "US", 118 | "CN": "Let's Encrypt Authority X3", 119 | "L": null, 120 | "O": "Let's Encrypt", 121 | "OU": null, 122 | "ST": null, 123 | "aggregated": "/C=US/CN=Let's Encrypt Authority X3/O=Let's Encrypt" 124 | } 125 | }, 126 | { 127 | "as_der": "REDACT", 128 | "extensions": { 129 | "basicConstraints": "CA:TRUE", 130 | "keyUsage": "Key Cert Sign, C R L Sign", 131 | "subjectKeyIdentifier": "C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10" 132 | }, 133 | "fingerprint": "DA:C9:02:4F:54:D8:F6:DF:94:93:5F:B1:73:26:38:CA:6A:D7:7C:13", 134 | "not_after": 1633010475, 135 | "not_before": 970348339, 136 | "serial_number": "44AFB080D6A327BA893039862EF8406B", 137 | "subject": { 138 | "C": null, 139 | "CN": "DST Root CA X3", 140 | "L": null, 141 | "O": "Digital Signature Trust Co.", 142 | "OU": null, 143 | "ST": null, 144 | "aggregated": "/CN=DST Root CA X3/O=Digital Signature Trust Co." 145 | } 146 | } 147 | ], 148 | "leaf_cert": { 149 | "all_domains": [ 150 | "rundschleifmaschinen-service.de", 151 | "www.rundschleifmaschinen-service.de" 152 | ], 153 | "as_der": "REDACT", 154 | "extensions": { 155 | "authorityInfoAccess": "CA Issuers - URI:http://cert.int-x3.letsencrypt.org/\nOCSP - URI:http://ocsp.int-x3.letsencrypt.org\n", 156 | "authorityKeyIdentifier": "keyid:A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1\n", 157 | "basicConstraints": "CA:FALSE", 158 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.letsencrypt.org\n User Notice: is Certificate may only be relied upon by Relying Parties and only in accordance with the Certificate Policy found at https://letsencrypt.org/repository/", 159 | "ctlSignedCertificateTimestamp": "BIHyAPAAdQBvU3asMfAxGdiZAKRRFf93FRwR2QLBACkGjbIImjfZEwAAAWYyuWCoAAAEAwBGMEQCIHAfF-WDz1YkPCONYN0aXohfUPFrhiKG61tXfDilc3dUAiB0oHYT0e5eCKi5k9mEzRpqC-NdvhEtr8qKBlxEoiQsGwB3ACk8UZZUyDlluqpQ_FgH1Ldvv1h6KXLcpMMM9OVFR_R4AAABZjK5YkwAAAQDAEgwRgIhAKlrVU0Na8GF1AT7lCpeUJMchwfHnFsjswnpultsgKQhAiEAuPvplxBQsMHbioLdPsNRQSr-xUHV2g7yZkUnKqZHbnQ=", 160 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication", 161 | "keyUsage": "Digital Signature, Key Encipherment", 162 | "subjectAltName": "DNS:www.rundschleifmaschinen-service.de, DNS:rundschleifmaschinen-service.de", 163 | "subjectKeyIdentifier": "E3:45:2E:7F:5C:8D:B4:17:CC:B8:73:09:E3:DA:F7:F3:F9:ED:F2:15" 164 | }, 165 | "fingerprint": "9A:3A:AF:F8:DC:A4:18:4B:B6:46:61:F7:29:46:FA:42:9B:CA:9C:71", 166 | "not_after": 1546221701, 167 | "not_before": 1538445701, 168 | "serial_number": "3428B7C70A67819D5B9E7A13D2B9B8C778F", 169 | "subject": { 170 | "C": null, 171 | "CN": "rundschleifmaschinen-service.de", 172 | "L": null, 173 | "O": null, 174 | "OU": null, 175 | "ST": null, 176 | "aggregated": "/CN=rundschleifmaschinen-service.de" 177 | } 178 | }, 179 | "seen": 1538635262.353125, 180 | "source": { 181 | "name": "Google 'Rocketeer' log", 182 | "url": "ct.googleapis.com/rocketeer/" 183 | }, 184 | "update_type": "X509LogEntry" 185 | }, 186 | "message_type": "certificate_update" 187 | }, 188 | { 189 | "data": { 190 | "cert_index": 447857993, 191 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447857993&end=447857993", 192 | "chain": [ 193 | { 194 | "as_der": "REDACT", 195 | "extensions": { 196 | "authorityInfoAccess": "CA Issuers - URI:http://apps.identrust.com/roots/dstrootcax3.p7c\nOCSP - URI:http://isrg.trustid.ocsp.identrust.com\n", 197 | "authorityKeyIdentifier": "keyid:C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10\n", 198 | "basicConstraints": "CA:TRUE", 199 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.root-x1.letsencrypt.org", 200 | "crlDistributionPoints": "Full Name:\n URI:http://crl.identrust.com/DSTROOTCAX3CRL.crl", 201 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign", 202 | "subjectKeyIdentifier": "A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1" 203 | }, 204 | "fingerprint": "E6:A3:B4:5B:06:2D:50:9B:33:82:28:2D:19:6E:FE:97:D5:95:6C:CB", 205 | "not_after": 1615999246, 206 | "not_before": 1458232846, 207 | "serial_number": "A0141420000015385736A0B85ECA708", 208 | "subject": { 209 | "C": "US", 210 | "CN": "Let's Encrypt Authority X3", 211 | "L": null, 212 | "O": "Let's Encrypt", 213 | "OU": null, 214 | "ST": null, 215 | "aggregated": "/C=US/CN=Let's Encrypt Authority X3/O=Let's Encrypt" 216 | } 217 | }, 218 | { 219 | "as_der": "REDACT", 220 | "extensions": { 221 | "basicConstraints": "CA:TRUE", 222 | "keyUsage": "Key Cert Sign, C R L Sign", 223 | "subjectKeyIdentifier": "C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10" 224 | }, 225 | "fingerprint": "DA:C9:02:4F:54:D8:F6:DF:94:93:5F:B1:73:26:38:CA:6A:D7:7C:13", 226 | "not_after": 1633010475, 227 | "not_before": 970348339, 228 | "serial_number": "44AFB080D6A327BA893039862EF8406B", 229 | "subject": { 230 | "C": null, 231 | "CN": "DST Root CA X3", 232 | "L": null, 233 | "O": "Digital Signature Trust Co.", 234 | "OU": null, 235 | "ST": null, 236 | "aggregated": "/CN=DST Root CA X3/O=Digital Signature Trust Co." 237 | } 238 | } 239 | ], 240 | "leaf_cert": { 241 | "all_domains": [ 242 | "www.runaflohmarkt.de" 243 | ], 244 | "as_der": "REDACT", 245 | "extensions": { 246 | "authorityInfoAccess": "CA Issuers - URI:http://cert.int-x3.letsencrypt.org/\nOCSP - URI:http://ocsp.int-x3.letsencrypt.org\n", 247 | "authorityKeyIdentifier": "keyid:A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1\n", 248 | "basicConstraints": "CA:FALSE", 249 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.letsencrypt.org\n User Notice: is Certificate may only be relied upon by Relying Parties and only in accordance with the Certificate Policy found at https://letsencrypt.org/repository/", 250 | "ctlSignedCertificateTimestamp": "BIHxAO8AdQBvU3asMfAxGdiZAKRRFf93FRwR2QLBACkGjbIImjfZEwAAAWX9AeXwAAAEAwBGMEQCICIkjDXPcRgbcoKuh8Ciu_1sIVVKj_oGb-bzc8zPyhF2AiAhCQMKgrBcxZpZpGgOEgyBxIX6WqJFDOGamrWW-I55IAB2ACk8UZZUyDlluqpQ_FgH1Ldvv1h6KXLcpMMM9OVFR_R4AAABZf0B56wAAAQDAEcwRQIhAPNKe7X7XqNZF7H4NOWW-DtSvx1jVWxqsZVnknCjrkjrAiBTIKM-qsi4QMFHbTRfxz4tiRvI14vCXDAbyoLgbp6BKw==", 251 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication", 252 | "keyUsage": "Digital Signature, Key Encipherment", 253 | "subjectAltName": "DNS:www.runaflohmarkt.de", 254 | "subjectKeyIdentifier": "7C:82:16:CB:31:94:C6:C5:5C:72:A1:37:CA:AE:B9:9B:3D:73:3E:9B" 255 | }, 256 | "fingerprint": "AD:5E:3D:91:50:46:7E:C6:D9:30:FD:65:11:8B:CE:81:FF:29:49:B9", 257 | "not_after": 1545320484, 258 | "not_before": 1537544484, 259 | "serial_number": "36434086EFE2BB58A2068BBA9F2E96B7898", 260 | "subject": { 261 | "C": null, 262 | "CN": "www.runaflohmarkt.de", 263 | "L": null, 264 | "O": null, 265 | "OU": null, 266 | "ST": null, 267 | "aggregated": "/CN=www.runaflohmarkt.de" 268 | } 269 | }, 270 | "seen": 1538635262.249552, 271 | "source": { 272 | "name": "Google 'Rocketeer' log", 273 | "url": "ct.googleapis.com/rocketeer/" 274 | }, 275 | "update_type": "X509LogEntry" 276 | }, 277 | "message_type": "certificate_update" 278 | } 279 | ] 280 | -------------------------------------------------------------------------------- /tests/test_domain_matching_analyser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Common domain matching analyser. 3 | ''' 4 | import copy 5 | import os 6 | import unittest 7 | 8 | from certstream_analytics.analysers import AhoCorasickDomainMatching 9 | from certstream_analytics.analysers import WordSegmentation 10 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption 11 | from certstream_analytics.analysers import BulkDomainMarker 12 | from certstream_analytics.analysers import IDNADecoder 13 | from certstream_analytics.analysers import HomoglyphsDecoder 14 | 15 | 16 | class DomainMatchingTest(unittest.TestCase): 17 | ''' 18 | Test all the common domain matching analysers. 19 | ''' 20 | def test_ahocorasick(self): 21 | ''' 22 | Compare some mock domains against the list of most popular domains 23 | using Aho-Corasick algorithm. 24 | ''' 25 | # Load the mock list of common domains for testing. 26 | current_dir = os.path.dirname(os.path.realpath(__file__)) 27 | 28 | with open(os.path.join(current_dir, 'opendns-top-domains.txt')) as fhandle: 29 | domains = [line.rstrip() for line in fhandle] 30 | 31 | ahocorasick_analyser = AhoCorasickDomainMatching(domains) 32 | 33 | cases = [ 34 | { 35 | 'data': { 36 | 'all_domains': [ 37 | 'store.google.com', 38 | 'google.com', 39 | ], 40 | }, 41 | 'expected': [ 42 | { 43 | 'analyser': 'AhoCorasickDomainMatching', 44 | 'output': { 45 | 'store.google.com': ['google.com'], 46 | }, 47 | }, 48 | ], 49 | 'description': 'An exact match domain', 50 | }, 51 | 52 | { 53 | 'data': { 54 | 'all_domains': [ 55 | 'www.facebook.com.msg40.site', 56 | ], 57 | }, 58 | 'expected': [ 59 | { 60 | 'analyser': 'AhoCorasickDomainMatching', 61 | 'output': { 62 | 'www.facebook.com.msg40.site': ['facebook.com'], 63 | }, 64 | }, 65 | ], 66 | 'description': 'A sample phishing domain with a sub-domain match', 67 | }, 68 | 69 | { 70 | 'data': { 71 | 'all_domains': [ 72 | 'login-appleid.apple.com.managesuppport.co', 73 | ], 74 | }, 75 | 'expected': [ 76 | { 77 | 'analyser': 'AhoCorasickDomainMatching', 78 | 'output': { 79 | 'login-appleid.apple.com.managesuppport.co': ['apple.com'], 80 | }, 81 | }, 82 | ], 83 | 'description': 'A sample phishing domain with a partial string match', 84 | }, 85 | 86 | { 87 | 'data': { 88 | 'all_domains': [ 89 | 'socket.io', 90 | ], 91 | }, 92 | 'expected': [], 93 | 'description': 'A non-matching domain (not in the list of most popular domains)', 94 | }, 95 | 96 | { 97 | 'data': { 98 | 'all_domains': [ 99 | 'www.foobar2000.com', 100 | ], 101 | }, 102 | 'expected': [], 103 | 'description': 'A non-matching domain (excluded pattern)', 104 | }, 105 | 106 | { 107 | 'data': { 108 | 'all_domains': [ 109 | 'autodiscover.blablabla.com', 110 | ], 111 | }, 112 | 'expected': [], 113 | 'description': 'Match a ignored pattern', 114 | }, 115 | ] 116 | 117 | for case in cases: 118 | got = ahocorasick_analyser.run(case['data']) 119 | self.assertListEqual(got['analysers'], case['expected'], case['description']) 120 | 121 | def test_wordsegmentation(self): 122 | ''' 123 | Try to segment some domains and check the result. 124 | ''' 125 | wordsegmentation = WordSegmentation() 126 | 127 | cases = [ 128 | { 129 | 'data': { 130 | 'all_domains': [ 131 | 'store.google.com', 132 | 'google.com', 133 | ], 134 | }, 135 | 'expected': [ 136 | { 137 | 'analyser': 'WordSegmentation', 138 | 'output': { 139 | 'store.google.com': ['store', 'google', 'com'], 140 | 'google.com': ['google', 'com'], 141 | }, 142 | }, 143 | ], 144 | 'description': 'A legit domain', 145 | }, 146 | 147 | { 148 | 'data': { 149 | 'all_domains': [ 150 | 'www.facebook.com.msg40.site', 151 | ], 152 | }, 153 | 'expected': [ 154 | { 155 | 'analyser': 'WordSegmentation', 156 | 'output': { 157 | 'www.facebook.com.msg40.site': ['www', 'facebook', 'com', 'msg40', 'site'], 158 | }, 159 | }, 160 | ], 161 | 'description': 'Word segmentation using the domain separator (dot)', 162 | }, 163 | 164 | { 165 | 'data': { 166 | 'all_domains': [ 167 | 'login-appleid.apple.com.managesuppport.co', 168 | ], 169 | }, 170 | 'expected': [ 171 | { 172 | 'analyser': 'WordSegmentation', 173 | 'output': { 174 | 'login-appleid.apple.com.managesuppport.co': [ 175 | 'login', 176 | 'apple', 177 | 'id', 178 | 'apple', 179 | 'com', 180 | 'manage', 181 | 'suppport', 182 | 'co' 183 | ], 184 | }, 185 | }, 186 | ], 187 | 'description': 'Word segmentation using dictionary', 188 | }, 189 | 190 | { 191 | 'data': { 192 | 'all_domains': [ 193 | 'arch.mappleonline.com', 194 | ], 195 | }, 196 | 'expected': [ 197 | { 198 | 'analyser': 'WordSegmentation', 199 | 'output': { 200 | 'arch.mappleonline.com': ['arch', 'm', 'apple', 'online', 'com'], 201 | }, 202 | }, 203 | ], 204 | 'description': 'Failed to segment the word correctly', 205 | }, 206 | 207 | { 208 | 'data': { 209 | 'all_domains': [ 210 | 'www.freybrothersinc.com', 211 | ], 212 | }, 213 | 'expected': [ 214 | { 215 | 'analyser': 'WordSegmentation', 216 | 'output': { 217 | 'www.freybrothersinc.com': ['www', 'frey', 'brothers', 'com'], 218 | }, 219 | }, 220 | ], 221 | 'description': 'Ignore certain stop words (inc) when doing segmentation', 222 | }, 223 | ] 224 | 225 | for case in cases: 226 | got = wordsegmentation.run(case['data']) 227 | self.assertListEqual(got['analysers'], case['expected'], case['description']) 228 | 229 | def test_domain_matching(self): 230 | ''' 231 | Combine the result of all domain matching analysers into one. 232 | ''' 233 | # The first option decides if the TLD is included in the match 234 | options = [ 235 | (True, DomainMatchingOption.SUBSET_MATCH), 236 | (False, DomainMatchingOption.SUBSET_MATCH), 237 | (True, DomainMatchingOption.ORDER_MATCH), 238 | (False, DomainMatchingOption.ORDER_MATCH), 239 | ] 240 | 241 | analysers = {o: DomainMatching(include_tld=o[0], option=o[1]) for o in options} 242 | 243 | cases = [ 244 | { 245 | 'data': { 246 | 'all_domains': [ 247 | 'store.google.com', 248 | 'google.com', 249 | ], 250 | 251 | 'analysers': [ 252 | { 253 | 'analyser': 'AhoCorasickDomainMatching', 254 | 'output': { 255 | 'store.google.com': ['google.com'], 256 | }, 257 | }, 258 | 259 | { 260 | 'analyser': 'WordSegmentation', 261 | 'output': { 262 | 'store.google.com': ['store', 'google', 'com'], 263 | 'google.com': ['google', 'com'], 264 | }, 265 | }, 266 | ], 267 | }, 268 | 'expected': { 269 | (True, DomainMatchingOption.SUBSET_MATCH): [], 270 | (False, DomainMatchingOption.SUBSET_MATCH): [], 271 | (True, DomainMatchingOption.ORDER_MATCH): [], 272 | (False, DomainMatchingOption.ORDER_MATCH): [], 273 | }, 274 | 'description': 'A legit domain so it will be skipped (no match reported)', 275 | }, 276 | 277 | { 278 | 'data': { 279 | 'all_domains': [ 280 | 'login-appleid.managesuppport.com', 281 | ], 282 | 283 | 'analysers': [ 284 | { 285 | 'analyser': 'AhoCorasickDomainMatching', 286 | 'output': { 287 | 'login-appleid.managesuppport.com': ['apple.com'], 288 | }, 289 | }, 290 | 291 | { 292 | 'analyser': 'WordSegmentation', 293 | 'output': { 294 | 'login-appleid.managesuppport.com': [ 295 | 'login', 296 | 'apple', 297 | 'id', 298 | 'manage', 299 | 'suppport' 300 | ], 301 | }, 302 | }, 303 | ], 304 | }, 305 | 'expected': { 306 | (True, DomainMatchingOption.SUBSET_MATCH): [], 307 | (False, DomainMatchingOption.SUBSET_MATCH): [ 308 | { 309 | 'analyser': 'DomainMatching', 310 | 'output': { 311 | 'login-appleid.managesuppport.com': ['apple.com'] 312 | }, 313 | }, 314 | ], 315 | (True, DomainMatchingOption.ORDER_MATCH): [], 316 | (False, DomainMatchingOption.ORDER_MATCH): [ 317 | { 318 | 'analyser': 'DomainMatching', 319 | 'output': { 320 | 'login-appleid.managesuppport.com': ['apple.com'] 321 | }, 322 | }, 323 | ], 324 | }, 325 | 'description': 'Find a matching phishing domain', 326 | }, 327 | 328 | { 329 | 'data': { 330 | 'all_domains': [ 331 | 'djunprotected.com', 332 | 'www.djunprotected.com' 333 | ], 334 | 335 | 'analysers': [ 336 | { 337 | 'analyser': 'AhoCorasickDomainMatching', 338 | 'output': { 339 | 'djunprotected.com': ['ted.com'] 340 | } 341 | }, 342 | 343 | { 344 | 'analyser': 'WordSegmentation', 345 | 'output': { 346 | 'djunprotected.com': ['dj', 'unprotected', 'com'], 347 | 'www.djunprotected.com': ['www', 'dj', 'unprotected', 'com'] 348 | } 349 | }, 350 | ], 351 | }, 352 | 'expected': { 353 | (True, DomainMatchingOption.SUBSET_MATCH): [], 354 | (False, DomainMatchingOption.SUBSET_MATCH): [], 355 | (True, DomainMatchingOption.ORDER_MATCH): [], 356 | (False, DomainMatchingOption.ORDER_MATCH): [], 357 | }, 358 | 'description': 'Find a matching phishing domain', 359 | }, 360 | ] 361 | 362 | for case in cases: 363 | for option, analyser in analysers.items(): 364 | expected = copy.deepcopy(case['data']['analysers']) 365 | expected.extend(case['expected'][option]) 366 | 367 | got = analyser.run(case['data']) 368 | self.assertListEqual(got['analysers'], expected, 369 | '{} ({})'.format(case['description'], option)) 370 | 371 | def test_bulk_domain_marker(self): 372 | ''' 373 | Test the bulk domain analyser. 374 | ''' 375 | bulky = BulkDomainMarker() 376 | 377 | cases = [ 378 | { 379 | 'data': { 380 | 'all_domains': [ 381 | 'store.google.com', 382 | 'google.com', 383 | ], 384 | }, 385 | 'expected': [ 386 | {'analyser': 'BulkDomainMarker', 'output': False} 387 | ], 388 | 'description': 'Not a bulk record', 389 | }, 390 | { 391 | 'data': { 392 | 'all_domains': [ 393 | 'a.com', 394 | 'b.com', 395 | 'c.com', 396 | 'd.com', 397 | 'e.com', 398 | 'f.com', 399 | 'g.com', 400 | 'h.com', 401 | 'i.com', 402 | 'j.com', 403 | 'k.com', 404 | 'l.com', 405 | 'm.com', 406 | 'n.com', 407 | 'o.com', 408 | ], 409 | }, 410 | 'expected': [ 411 | {'analyser': 'BulkDomainMarker', 'output': True} 412 | ], 413 | 'description': 'Mark a bulk record', 414 | }, 415 | ] 416 | 417 | for case in cases: 418 | got = bulky.run(case['data']) 419 | self.assertListEqual(got['analysers'], case['expected'], case['description']) 420 | 421 | def test_idn_decoder(self): 422 | ''' 423 | Test the IDNA decoder. 424 | ''' 425 | decoder = IDNADecoder() 426 | 427 | cases = [ 428 | { 429 | 'data': { 430 | 'all_domains': [ 431 | 'store.google.com', 432 | 'google.com', 433 | ], 434 | }, 435 | 'expected': [ 436 | 'store.google.com', 437 | 'google.com', 438 | ], 439 | 'description': 'There is no domain in IDNA format', 440 | }, 441 | { 442 | 'data': { 443 | 'all_domains': [ 444 | 'xn--f1ahbgpekke1h.xn--p1ai', 445 | 'tigrobaldai.lt' 446 | ], 447 | }, 448 | 'expected': [ 449 | 'укрэмпужск.рф', 450 | 'tigrobaldai.lt' 451 | ], 452 | 'description': 'Convert some domains in IDNA format', 453 | }, 454 | { 455 | 'data': { 456 | 'all_domains': [ 457 | 'xn--foobar.xn--me', 458 | ], 459 | }, 460 | 'expected': [ 461 | 'xn--foobar.xn--me', 462 | ], 463 | 'description': 'Handle an invalid IDNA string', 464 | }, 465 | { 466 | 'data': { 467 | 'all_domains': [ 468 | '*.xn---35-5cd3cln6a9bzb.xn--p1ai', 469 | '*.nl-dating-vidkid.com', 470 | ], 471 | }, 472 | 'expected': [ 473 | '*.отмычка-35.рф', 474 | '*.nl-dating-vidkid.com', 475 | ], 476 | 'description': 'Handle an invalid code point', 477 | }, 478 | ] 479 | 480 | for case in cases: 481 | got = decoder.run(case['data']) 482 | self.assertListEqual(got['all_domains'], case['expected'], case['description']) 483 | 484 | def test_homoglyphs_decoder(self): 485 | ''' 486 | Test the homoglyphs decoder. 487 | ''' 488 | cases = [ 489 | { 490 | 'data': { 491 | 'all_domains': [ 492 | 'store.google.com', 493 | '*.google.com', 494 | ], 495 | }, 496 | 'greedy': False, 497 | 'expected': [ 498 | 'store.google.com', 499 | '*.google.com', 500 | ], 501 | 'description': 'Normal domains in ASCII', 502 | }, 503 | { 504 | 'data': { 505 | 'all_domains': [ 506 | 'store.google.com', 507 | '*.google.com', 508 | ], 509 | }, 510 | 'greedy': True, 511 | 'expected': [ 512 | 'store.google.com', 513 | 'store.google.corn', 514 | 'store.googie.com', 515 | 'store.googie.corn', 516 | '*.google.com', 517 | '*.google.corn', 518 | '*.googie.com', 519 | '*.googie.corn' 520 | ], 521 | 'description': 'Normal domains in ASCII with a greedy decoder', 522 | }, 523 | { 524 | 'data': { 525 | 'all_domains': [ 526 | 'укрэмпужск.рф', 527 | 'tigrobaldai.lt', 528 | ], 529 | }, 530 | 'greedy': False, 531 | 'expected': [ 532 | 'yкpэмпyжcк.pф', 533 | 'tigrobaldai.lt', 534 | ], 535 | 'description': 'Normal domains in Unicode', 536 | }, 537 | { 538 | 'data': { 539 | 'all_domains': [ 540 | 'укрэмпужск.рф', 541 | 'tigrobaldai.lt', 542 | ], 543 | }, 544 | 'greedy': True, 545 | 'expected': [ 546 | 'yкpэмпyжcк.pф', 547 | 'tigrobaldai.lt', 548 | 'tigrobaldai.it', 549 | 'tigrobaidai.lt', 550 | 'tigrobaidai.it', 551 | ], 552 | 'description': 'Normal domains in Unicode with a greedy decoder', 553 | }, 554 | { 555 | 'data': { 556 | 'all_domains': [ 557 | # MATHEMATICAL MONOSPACE SMALL P 1D699 558 | '*.𝗉aypal.com', 559 | 560 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO 561 | 'phishing.𝗉ay𝞀al.com', 562 | ], 563 | }, 564 | 'greedy': False, 565 | 'expected': [ 566 | '*.paypal.com', 567 | 'phishing.paypal.com', 568 | ], 569 | 'description': 'Phishing example in confusable homoglyphs' 570 | }, 571 | { 572 | 'data': { 573 | 'all_domains': [ 574 | # MATHEMATICAL MONOSPACE SMALL P 1D699 575 | '*.𝗉aypal.com', 576 | 577 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO 578 | 'phishing.𝗉ay𝞀al.com', 579 | ], 580 | }, 581 | 'greedy': True, 582 | 'expected': [ 583 | '*.paypal.com', 584 | '*.paypal.corn', 585 | '*.paypai.com', 586 | '*.paypai.corn', 587 | 'phishing.paypal.com', 588 | 'phishing.paypal.corn', 589 | 'phishing.paypai.com', 590 | 'phishing.paypai.corn', 591 | ], 592 | 'description': 'Phishing example in confusable homoglyphs with a greedy decoder' 593 | }, 594 | ] 595 | 596 | for case in cases: 597 | decoder = HomoglyphsDecoder(greedy=case['greedy']) 598 | 599 | got = decoder.run(case['data']) 600 | self.assertListEqual(got['all_domains'], case['expected'], case['description']) 601 | -------------------------------------------------------------------------------- /scripts/sundry/certstream-domain-features.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 145, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "domains = []\n", 13 | "features = []\n", 14 | "\n", 15 | "with open('domain-matching.20181014.decoded') as f:\n", 16 | " for line in f:\n", 17 | " record = json.loads(line.strip()) \n", 18 | " \n", 19 | " domains.extend(list(record['analysers'][0]['output'].keys()))\n", 20 | " features.extend(record['analysers'][-1]['output'])" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 146, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "14004" 32 | ] 33 | }, 34 | "execution_count": 146, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "len(domains)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 147, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "14004" 52 | ] 53 | }, 54 | "execution_count": 147, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "len(features)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 148, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "columns = ['NumberOfParts', 'Length', 'LongestPart', 'TLD', 'Randomness']" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 149, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "import pandas as pd\n", 79 | "\n", 80 | "df = pd.DataFrame(data=features, columns=columns, index=domains)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 150, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "(14004, 5)" 92 | ] 93 | }, 94 | "execution_count": 150, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "df.shape" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 151, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/html": [ 111 | "
\n", 112 | "\n", 125 | "\n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | "
NumberOfPartsLengthLongestPartTLDRandomness
www.sawyerrshousegivebackafrica.co.uk4372720.0
\n", 147 | "
" 148 | ], 149 | "text/plain": [ 150 | " NumberOfParts Length LongestPart \\\n", 151 | "www.sawyerrshousegivebackafrica.co.uk 4 37 27 \n", 152 | "\n", 153 | " TLD Randomness \n", 154 | "www.sawyerrshousegivebackafrica.co.uk 2 0.0 " 155 | ] 156 | }, 157 | "execution_count": 151, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "df.sample()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 152, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "count 6.000000\n", 175 | "mean 2334.000000\n", 176 | "std 2431.878862\n", 177 | "min 5.000000\n", 178 | "25% 109.000000\n", 179 | "50% 2355.000000\n", 180 | "75% 4507.250000\n", 181 | "max 4704.000000\n", 182 | "Name: NumberOfParts, dtype: float64" 183 | ] 184 | }, 185 | "execution_count": 152, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "df['NumberOfParts'].value_counts().describe()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 153, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "" 203 | ] 204 | }, 205 | "execution_count": 153, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | }, 209 | { 210 | "data": { 211 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAEvCAYAAADijX30AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFqtJREFUeJzt3X+s1nX9//HHxTmAID+OKJyji/yWWrp+bpWIsFzHzwEVTbDDam0tMGeSy5FlM1v+SKHPFpUzvksZG9NWm4IebB03NCgPTM2VU8ppjYoNGueiEA5g6PEcru8ffjvLj/Xhh16vwzncbn/Bm+t6X8/3c+xwP9d1uK5KrVarBQCAYkYM9gAAAMcbAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUFjjYA/wvzl48GD6+4fmJyU1NFSG7OxDlZ2XZ+fl2Xl5dl7eUN35yJENh33bYzrA+vtr2bPnH4M9xlFpaho7ZGcfquy8PDsvz87Ls/PyhurOJ08ef9i39RIkAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQ2DH9WZD1Nm7CmIwZXb8VHMlnQh2JA6/2Zf/eA3U5NwBQf8d1gI0Z3Zj/c2PnYI9xxLb+95zsH+whAICj5iVIAIDCBBgAQGECDACgMAEGAFCYAAMAKEyAAQAUJsAAAAo7rt8HjPJOntiYEaPG1O389Xrz24O9B7Krp68u5wbg+CPAKGrEqDHJrRMHe4wjNuLWniT7BnsMAIYJL0ECABQmwAAAChNgAACFCTAAgMIEGABAYf4XJAxz45pGZszIE+p2/nq99ceB117J/j2v1eXcAINNgMEwN2bkCfnAvR8Y7DGO2O8+/7vsjwADhicvQQIAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADACjssAOsv78/c+fOzRe/+MUkybZt2zJ//vy0tbVl8eLF6e3tTZL09vZm8eLFaWtry/z587N9+/aBc9xzzz1pa2vL7Nmzs3Hjxrf5UgAAhobDDrD77rsvZ5xxxsDvly1blgULFuSxxx7LhAkTsmbNmiTJ6tWrM2HChDz22GNZsGBBli1bliTZsmVLOjs709nZmZUrV+a2225Lf3//23w5AADHvsMKsO7u7vzqV79Ke3t7kqRWq+Wpp57K7NmzkyTz5s3L+vXrkyQbNmzIvHnzkiSzZ8/Ok08+mVqtlvXr12fOnDkZNWpUpk6dmtNPPz2bN2+uxzUBABzTDivAli5dmhtuuCEjRrx+8927d2fChAlpbGxMkrS0tKRarSZJqtVqTj311CRJY2Njxo8fn927d6daraalpWXgnM3NzQP3AQA4njQe6ga//OUvM2nSpLz//e/Pr3/96xIzDWhoqKSpaWzRxxwq7KU8Oy/Pzt+soWGEvRRm5+UdDzs/ZIA988wz2bBhQ7q6uvLqq69m//79WbJkSfbu3Zu+vr40Njamu7s7zc3NSV5/ZmvHjh1paWlJX19f9u3bl5NOOinNzc3p7u4eOG+1Wh24z3/S31/Lnj3/eIuX+J9Nnjy+bueut3rupZ7svDw7H16amsbaS2F2Xt5Q3fmRfL095EuQX/3qV9PV1ZUNGzbk+9//fs4777x873vfy7Rp07Ju3bokSUdHR1pbW5Mkra2t6ejoSJKsW7cu5513XiqVSlpbW9PZ2Zne3t5s27YtW7duzQc/+MGjuT4AgCHtqN8H7IYbbsiqVavS1taWPXv2ZP78+UmS9vb27NmzJ21tbVm1alW+9rWvJUnOOuusXHzxxbnkkkty1VVX5eabb05DQ8PbcxUAAEPIIV+C/FfTpk3LtGnTkiRTp04deOuJfzV69Ojcdddd//b+ixYtyqJFi45iTACA4cM74QMAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoLBDBtirr76a9vb2fPKTn8ycOXNy1113JUm2bduW+fPnp62tLYsXL05vb2+SpLe3N4sXL05bW1vmz5+f7du3D5zrnnvuSVtbW2bPnp2NGzfW6ZIAAI5thwywUaNG5d57783PfvazrF27Nhs3bsyzzz6bZcuWZcGCBXnssccyYcKErFmzJkmyevXqTJgwIY899lgWLFiQZcuWJUm2bNmSzs7OdHZ2ZuXKlbntttvS399f36sDADgGHTLAKpVKTjzxxCRJX19f+vr6UqlU8tRTT2X27NlJknnz5mX9+vVJkg0bNmTevHlJktmzZ+fJJ59MrVbL+vXrM2fOnIwaNSpTp07N6aefns2bN9frugAAjlmH9TNg/f39ufzyy3P++efn/PPPz9SpUzNhwoQ0NjYmSVpaWlKtVpMk1Wo1p556apKksbEx48ePz+7du1OtVtPS0jJwzubm5oH7AAAcTxoP50YNDQ15+OGHs3fv3lx77bX585//XO+5/v/jVtLUNLbIYw019lKenZdn52/W0DDCXgqz8/KOh50fVoD904QJEzJt2rQ8++yz2bt3b/r6+tLY2Jju7u40Nzcnef2ZrR07dqSlpSV9fX3Zt29fTjrppDQ3N6e7u3vgXNVqdeA+/0l/fy179vzjKC7r8EyePL5u5663eu6lnuy8PDsfXpqaxtpLYXZe3lDd+ZF8vT3kS5AvvfRS9u7dmyR55ZVX8sQTT+SMM87ItGnTsm7duiRJR0dHWltbkyStra3p6OhIkqxbty7nnXdeKpVKWltb09nZmd7e3mzbti1bt27NBz/4wSO+OACAoe6Qz4Dt3LkzN954Y/r7+1Or1XLRRRflE5/4RM4888x85StfyZ133plzzjkn8+fPT5K0t7fnhhtuSFtbWyZOnJgf/OAHSZKzzjorF198cS655JI0NDTk5ptvTkNDQ32vDgDgGHTIADv77LOzdu3aNx2fOnXqwFtP/KvRo0cPvFfY/7Ro0aIsWrToKMYEABg+vBM+AEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKO2SA7dixI5/73OdyySWXZM6cObn33nuTJHv27MnChQsza9asLFy4MD09PUmSWq2WO+64I21tbbnsssvy/PPPD5yro6Mjs2bNyqxZs9LR0VGnSwIAOLYdMsAaGhpy44035pFHHsn999+fn/70p9myZUtWrFiR6dOn59FHH8306dOzYsWKJElXV1e2bt2aRx99NLfffntuvfXWJK8H2/Lly/PAAw9k9erVWb58+UC0AQAcTw4ZYFOmTMn73ve+JMm4cePy7ne/O9VqNevXr8/cuXOTJHPnzs0vfvGLJBk4XqlU8uEPfzh79+7Nzp07s2nTpsyYMSNNTU2ZOHFiZsyYkY0bN9bx0gAAjk1H9DNg27dvzwsvvJAPfehD2bVrV6ZMmZIkmTx5cnbt2pUkqVaraWlpGbhPS0tLqtXqm443NzenWq2+HdcAADCkNB7uDV9++eVcd911uemmmzJu3Lg3/FmlUkmlUnnbh2toqKSpaezbft7hwF7Ks/Py7PzNGhpG2Ethdl7e8bDzwwqw1157Ldddd10uu+yyzJo1K0ly8sknZ+fOnZkyZUp27tyZSZMmJXn9ma3u7u6B+3Z3d6e5uTnNzc15+umnB45Xq9Wce+65/+vj9vfXsmfPP474og7X5Mnj63bueqvnXurJzsuz8+GlqWmsvRRm5+UN1Z0fydfbQ74EWavV8s1vfjPvfve7s3DhwoHjra2tWbt2bZJk7dq1ufDCC99wvFar5dlnn8348eMzZcqUzJw5M5s2bUpPT096enqyadOmzJw580ivDQBgyDvkM2C//e1v8/DDD+c973lPLr/88iTJ9ddfn6uvvjqLFy/OmjVrctppp+XOO+9MklxwwQV5/PHH09bWljFjxmTp0qVJkqampnzpS19Ke3t7kuTaa69NU1NTva4LAOCYdcgA++hHP5o//OEP//bP/vmeYP+qUqnklltu+be3b29vHwgwAIDjlXfCBwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYYcMsG984xuZPn16Lr300oFje/bsycKFCzNr1qwsXLgwPT09SZJarZY77rgjbW1tueyyy/L8888P3KejoyOzZs3KrFmz0tHRUYdLAQAYGg4ZYFdccUVWrlz5hmMrVqzI9OnT8+ijj2b69OlZsWJFkqSrqytbt27No48+mttvvz233nprkteDbfny5XnggQeyevXqLF++fCDaAACON4cMsI997GOZOHHiG46tX78+c+fOTZLMnTs3v/jFL95wvFKp5MMf/nD27t2bnTt3ZtOmTZkxY0aampoyceLEzJgxIxs3bqzD5QAAHPuO6mfAdu3alSlTpiRJJk+enF27diVJqtVqWlpaBm7X0tKSarX6puPNzc2pVqtvZW4AgCGr8a2eoFKppFKpvB2zvElDQyVNTWPrcu6hzl7Ks/Py7PzNGhpG2Ethdl7e8bDzowqwk08+OTt37syUKVOyc+fOTJo0Kcnrz2x1d3cP3K67uzvNzc1pbm7O008/PXC8Wq3m3HPPPeTj9PfXsmfPP45mxMMyefL4up273uq5l3qy8/LsfHhpahprL4XZeXlDdedH8vX2qF6CbG1tzdq1a5Mka9euzYUXXviG47VaLc8++2zGjx+fKVOmZObMmdm0aVN6enrS09OTTZs2ZebMmUfz0AAAQ94hnwG7/vrr8/TTT2f37t35+Mc/ni9/+cu5+uqrs3jx4qxZsyannXZa7rzzziTJBRdckMcffzxtbW0ZM2ZMli5dmiRpamrKl770pbS3tydJrr322jQ1NdXxsgAAjl2HDLDvf//7//b4vffe+6ZjlUolt9xyy7+9fXt7+0CAAQAcz7wTPgBAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKCwxsEeAGC4OWncyDSOOaFu5588eXxdztt34JXs3v9aXc4NvJEAA3ibNY45IS+cfc5gj3HEznnxhUSAQRFeggQAKEyAAQAUJsAAAAoTYAAAhQkwAIDCBBgAQGECDACgMAEGAFCYAAMAKEyAAQAUVjzAurq6Mnv27LS1tWXFihWlHx4AYNAVDbD+/v58+9vfzsqVK9PZ2Zmf//zn2bJlS8kRAAAGXdEP4968eXNOP/30TJ06NUkyZ86crF+/PmeeeWbJMQAYZiZOGJNRo+v3T9rkyePrct7eV/vSs/dAXc7Nsa1ogFWr1bS0tAz8vrm5OZs3by45AgDD0KjRjfm/12wY7DGO2LV3tw72CAySogF2pEaObKjbdx3/tPW/59T1/PVS773U1a09gz3BURnKO//d53832CMclaG883NefGGwRzgqQ3nnQzVmhvLO62m476Xoz4A1Nzenu7t74PfVajXNzc0lRwAAGHRFA+wDH/hAtm7dmm3btqW3tzednZ1pbR2a37EAABytoi9BNjY25uabb85VV12V/v7+fOpTn8pZZ51VcgQAgEFXqdVqtcEeAgDgeOKd8AEAChNgAACFCTAAgMIEGABAYQKsDr7+9a8P9gjHhT/96U958skn8/LLL7/heFdX1yBNNPxt3rx54NMrtmzZklWrVuXxxx8f5KmOH7/5zW+yatWqbNq0abBHGbaee+657N+/P0nyyiuv5K677so111yT7373u9m3b98gTzc83XfffdmxY8dgj1Gc/wX5Fl1zzTVvOvbrX/8606ZNS5LcfffdpUc6Ltx33335yU9+kjPOOCMvvvhibrrppvzXf/1XkmTevHnp6OgY5AmHn+XLl6erqyt9fX2ZMWNGnnvuuUybNi1PPPFEZs6cmUWLFg32iMNOe3t71qxZkyR54IEH8pOf/CRtbW3ZtGlTWltbc/XVVw/yhMPPnDlz8vDDD6exsTHf+ta3csIJJ2T27Nl56qmn8uKLL2b58uWDPeKw85GPfCRjxozJO9/5zsyZMycXX3xxJk2aNNhj1d0x/VFEQ0G1Ws0ZZ5yR+fPnp1KppFar5fe//32uvPLKwR5tWFu9enUeeuihnHjiidm+fXuuu+66/PWvf83nP//5+J6iPtatW5e1a9emt7c3M2bMSFdXV8aNG5cvfOELmT9/vgCrg76+voFf33///Vm1alUmTZqUK6+8Mp/+9KcFWB0cPHgwjY2v/9P4+9//fuCbuY9+9KO5/PLLB3O0YWvq1Kl56KGH8sQTT+SRRx7JD3/4w7zvfe/LpZdemra2towbN26wR6wLL0G+RQ8++GDe//735+6778748eMzbdq0jB49Oueee27OPffcwR5v2Dp48GBOPPHEJMk73vGO/PjHP05XV1e+853vCLA6aWhoSENDw8B3qv/8onjCCSdkxAhfSurh4MGD6enpye7du1Or1QaeFRg7dmwaGhoGebrh6ayzzsqDDz6YJDn77LPzu9+9/jmqf/nLXwbCjLdXpVLJiBEjMnPmzCxdujQbN27MZz/72WzcuHHglY3hyN+mt2jEiBFZsGBBLrrooixdujSnnHJK+vv7B3usYe/kk0/OCy+8kHPOOSdJcuKJJ+aee+7JTTfdlD/+8Y+DPN3wNHLkyBw4cCBjxozJQw89NHB83759AqxO9u/fnyuuuCK1Wi2VSiU7d+7MlClT8vLLL/tGo06WLFmSJUuW5Ec/+lFOOumkfOYzn0lLS0tOPfXULFmyZLDHG5b+59/lkSNH5sILL8yFF16YAwcODNJU9ednwN5mv/rVr/LMM8/k+uuvH+xRhrXu7u40NDRk8uTJb/qz3/72t/nIRz4yCFMNb729vRk1atSbjr/00kv529/+lve+972DMNXx6cCBA/n73/+eqVOnDvYow9b+/fuzffv29PX1paWlJaeccspgjzRs/eUvf8m73vWuwR6jOAEGAFCY1w0AAAoTYAAAhQkwAIDCBBgAQGECDACgsP8HRsWezpLvOk4AAAAASUVORK5CYII=\n", 212 | "text/plain": [ 213 | "
" 214 | ] 215 | }, 216 | "metadata": {}, 217 | "output_type": "display_data" 218 | } 219 | ], 220 | "source": [ 221 | "import matplotlib.pyplot as plt\n", 222 | "import seaborn\n", 223 | "\n", 224 | "seaborn.set_style(\"darkgrid\")\n", 225 | "\n", 226 | "plt.figure(figsize=(10,5))\n", 227 | "df['NumberOfParts'].value_counts().plot(kind='bar')" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 154, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "" 239 | ] 240 | }, 241 | "execution_count": 154, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | }, 245 | { 246 | "data": { 247 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEvCAYAAABhSUTPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADlhJREFUeJzt3V1o3Qf9x/HPabJqa5fG1jyoRP9uTipVvHASM0Qws43SSTO1InrhClIFoWyVSX1EhA3ROYp6FQbSXQhitamsgz1kSoulgsp8wAcYdtDJmkA0a+u21pzmfzEs7v8fnCzftCenfb3ucnJyzgeaX86b8zs9p7GwsLAQAACWZFW7BwAAdDIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUNB9Oe/swoULaTZ9eg2tdXU1/K4Ay87fFl6Oa67pWtT1LmtMNZsLmZt79nLeJR2qt3et3xVg2fnbwsvR13ftoq7nNB8AQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFl/Wz+Xhp63rWZM0r/FP8X4v9TKSryXPn5nP29HPtngHAf/EIvgKseUV3/mfv4XbPoAM8+c1tOdvuEQC8iNN8AAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAWLjqlms5nx8fF85jOfSZKcPHkyO3bsyJYtW3L77bfn/Pnzl2wkAMBKteiYuv/++3P99ddf/Pqee+7JbbfdlkceeSQ9PT05cODAJRkIALCSLSqmTp06lV/84hf56Ec/miRZWFjI8ePHMzY2liS59dZbMzU1delWAgCsUIuKqbvvvjt33nlnVq164er//Oc/09PTk+7u7iTJ4OBgpqenL91KAIAVqrvVFX7+859nw4YNedvb3pZf/epXpTvr6mqkt3dt6TbgaucYgqXr6lrlGGLZtYyp3/72t3nsscdy5MiRnDt3LmfPns1dd92V06dPZ35+Pt3d3Tl16lQGBgZa3lmzuZC5uWeXZfiVpK/v2nZPoIM4hmDpenvXOoZYtMU+Prc8zff5z38+R44cyWOPPZZ777037373u/Od73wnw8PDeeihh5IkBw8ezOjoaG0xAEAHWvL7TN155535wQ9+kC1btmRubi47duxYzl0AAB2h5Wm+/zY8PJzh4eEkydDQkLdDAACuet4BHQCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABR0t7rCuXPn8slPfjLnz59Ps9nM2NhYdu/enZMnT2bPnj2Zm5vL5s2b861vfSurV6++HJsBAFaMls9MrV69Ovv378/PfvazTE5O5ujRo3n88cdzzz335LbbbssjjzySnp6eHDhw4HLsBQBYUVrGVKPRyKte9aokyfz8fObn59NoNHL8+PGMjY0lSW699dZMTU1d2qUAACvQol4z1Ww2s3379tx000256aabMjQ0lJ6ennR3v3CWcHBwMNPT05d0KADAStTyNVNJ0tXVlUOHDuX06dP53Oc+l7/97W9LurOurkZ6e9cu6WeBFziGYOm6ulY5hlh2i4qp/+jp6cnw8HAef/zxnD59OvPz8+nu7s6pU6cyMDDQ8uebzYXMzT275LFXqr6+a9s9gQ7iGIKl6+1d6xhi0Rb7+NzyNN8//vGPnD59Okny/PPP59ixY7n++uszPDychx56KEly8ODBjI6OFuYCAHSmls9MzczMZO/evWk2m1lYWMgHPvCBvO9978ub3/zm3HHHHdm3b1/e+ta3ZseOHZdjLwDAitIypjZt2pTJycn/d/nQ0JC3QwAArnreAR0AoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUdLe6wtNPP50vfOELmZ2dTaPRyMc+9rF86lOfytzcXO644478/e9/z+tf//rs27cv69evvxybAQBWjJbPTHV1dWXv3r158MEH86Mf/Sg//OEP88QTT2RiYiIjIyN5+OGHMzIykomJicuxFwBgRWkZU/39/dm8eXOSZN26dbnuuusyPT2dqampjI+PJ0nGx8fz6KOPXtqlAAAr0Mt6zdRTTz2VP//5z3nHO96R2dnZ9Pf3J0n6+voyOzt7SQYCAKxkLV8z9R//+te/snv37nzpS1/KunXrXvS9RqORRqPR8ja6uhrp7V378lcCFzmGYOm6ulY5hlh2i4qpf//739m9e3c+9KEPZevWrUmSjRs3ZmZmJv39/ZmZmcmGDRta3k6zuZC5uWdri69AfX3XtnsCHcQxBEvX27vWMcSiLfbxueVpvoWFhXz5y1/Oddddl507d168fHR0NJOTk0mSycnJ3HzzzUucCgDQuVo+M/Wb3/wmhw4dylve8pZs3749SbJnz57s2rUrt99+ew4cOJDXve512bdv3yUfCwCw0rSMqRtvvDF//etfX/J7+/fvX/ZBAACdxDugAwAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgIKWMfXFL34xIyMjueWWWy5eNjc3l507d2br1q3ZuXNnnnnmmUs6EgBgpWoZUx/+8Idz3333veiyiYmJjIyM5OGHH87IyEgmJiYu2UAAgJWsZUy9613vyvr161902dTUVMbHx5Mk4+PjefTRRy/NOgCAFW5Jr5manZ1Nf39/kqSvry+zs7PLOgoAoFN0V2+g0Wik0Wgs6rpdXY309q6t3iVc1RxDsHRdXascQyy7JcXUxo0bMzMzk/7+/szMzGTDhg2L+rlmcyFzc88u5S6vaH1917Z7Ah3EMQRL19u71jHEoi328XlJp/lGR0czOTmZJJmcnMzNN9+8lJsBAOh4LWNqz549+fjHP54TJ07kve99b3784x9n165d+eUvf5mtW7fm2LFj2bVr1+XYCgCw4rQ8zXfvvfe+5OX79+9f9jEAAJ3GO6ADABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgoLvdAwC4NDau786q1WvaPWPF6eu7tt0TVpwL55/L7DPz7Z7RscQUwBVq1eo1ydfXt3sGHWDV159JcqbdMzqW03wAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQEEppo4cOZKxsbFs2bIlExMTy7UJAKBjLDmmms1mvvGNb+S+++7L4cOH88ADD+SJJ55Yzm0AACvekmPq97//fd74xjdmaGgoq1evzrZt2zI1NbWc2wAAVrwlx9T09HQGBwcvfj0wMJDp6ellGQUA0Cm6L+edXXNNV/r6rr2cd9kxnvzmtnZPoEM4hnhZvv5MuxfQIfxtWbolPzM1MDCQU6dOXfx6eno6AwMDyzIKAKBTLDmm3v72t+fJJ5/MyZMnc/78+Rw+fDijo6PLuQ0AYMVb8mm+7u7ufO1rX8unP/3pNJvNfOQjH8kNN9ywnNsAAFa8xsLCwkK7RwAAdCrvgA4AUCCmAAAKxBQAQIGYAgAoEFO03e9+97ucPXs2SfL888/nu9/9bj772c/m29/+ds6cOdPmdUCnuv/++/P000+3ewZXAf+bj7bbtm1bDh06lO7u7nz1q1/NK1/5yoyNjeX48eP5y1/+ku9///vtngh0oHe+851Zs2ZN3vCGN2Tbtm354Ac/mA0bNrR7Flegy/pxMvBSLly4kO7uF34V//jHP+bgwYNJkhtvvDHbt29v5zSggw0NDeWnP/1pjh07lgcffDDf+973snnz5txyyy3ZsmVL1q1b1+6JXCGc5qPtbrjhhvzkJz9JkmzatCl/+MMfkiQnTpy4GFkAL1ej0ciqVavynve8J3fffXeOHj2aT3ziEzl69Gje//73t3seVxCn+Wi7M2fO5K677sqvf/3rvPrVr86f/vSnDA4O5rWvfW2+8pWvZNOmTe2eCHSg8fHxTE5OvuT3nnvuuaxZs+YyL+JKJaZYMc6ePZunnnoq8/PzGRwczGte85p2TwI62IkTJ/KmN72p3TO4CogpAIACr5kCACgQUwAABWIKAKBATAEAFIgpAICC/wWjtKP6Gu64AgAAAABJRU5ErkJggg==\n", 248 | "text/plain": [ 249 | "
" 250 | ] 251 | }, 252 | "metadata": {}, 253 | "output_type": "display_data" 254 | } 255 | ], 256 | "source": [ 257 | "plt.figure(figsize=(10,5))\n", 258 | "df.loc[df['NumberOfParts'] > 5]['NumberOfParts'].value_counts().plot(kind='bar')" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 156, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/html": [ 269 | "
\n", 270 | "\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | "
NumberOfPartsLengthLongestPartTLDRandomness
waws-prod-blu-43680001.state.p.azurewebsites.windows.net6562230.0
\n", 305 | "
" 306 | ], 307 | "text/plain": [ 308 | " NumberOfParts Length \\\n", 309 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 6 56 \n", 310 | "\n", 311 | " LongestPart TLD \\\n", 312 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 22 3 \n", 313 | "\n", 314 | " Randomness \n", 315 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 0.0 " 316 | ] 317 | }, 318 | "execution_count": 156, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "df.loc[df['NumberOfParts'] > 5].sample()" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 157, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/html": [ 335 | "
\n", 336 | "\n", 349 | "\n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | "
NumberOfPartsLength
NumberOfParts1.0000000.604539
Length0.6045391.000000
\n", 370 | "
" 371 | ], 372 | "text/plain": [ 373 | " NumberOfParts Length\n", 374 | "NumberOfParts 1.000000 0.604539\n", 375 | "Length 0.604539 1.000000" 376 | ] 377 | }, 378 | "execution_count": 157, 379 | "metadata": {}, 380 | "output_type": "execute_result" 381 | } 382 | ], 383 | "source": [ 384 | "df[['NumberOfParts', 'Length']].corr()" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 158, 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "text/plain": [ 395 | "" 396 | ] 397 | }, 398 | "execution_count": 158, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | }, 402 | { 403 | "data": { 404 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAFACAYAAAD589sCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xt4lNWBx/HfTIYggdDMYC7gohAsFQOL7NOKaIm5GC4qcjMU9HEFdFmf1gaNQqMIi6h4edDS9bZEsRCXBTcRQ6nKxVyablWwyqpA0G4BATWJTWa4JAKZzOwfKdEgwkBm5rwz+X7+CWcyefPLkCf55bzve47N7/f7BQAAAEuwmw4AAACAb1DOAAAALIRyBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhThMB+gIn8+nlhY2OAAAANbXpUtMQM+L6HLW0uKXx9NkOgYAAMAZJSbGB/Q8TmsCAABYCOUMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhIStn9913n0aMGKHrr7++7TGPx6MZM2Zo1KhRmjFjhg4ePChJ8vv9evjhh5WTk6Nx48Zpx44doYoFAABgaSErZ5MmTdKLL77Y7rHCwkKNGDFCmzZt0ogRI1RYWChJqqqq0t69e7Vp0yY99NBDWrhwYahiAQAAWFrIdgj4yU9+ogMHDrR7rKysTC+//LIkacKECbrllls0Z84clZWVacKECbLZbLrssst06NAh1dXVKSkpKVTxAACdVGVlmcrLNwflWB6PW5KUkODs8LGysnKUkZHd4eMg8oV1+6b6+vq2wpWYmKj6+npJUm1trVJSUtqel5KSotra2jOWs5gYmxIS4kIXGAAQdeLiYuVwBOfE0Ylydv75vTp8rLi4WH6nQZLBvTVtNptsNluHjsHemgCAs3X55SN1+eUjg3KsBQsK/v52cVCOx++06GbJvTV79eqluro6SVJdXZ1cLpckKTk5WTU1NW3Pq6mpUXJycjijAQAAWEJYy1lWVpZKS0slSaWlpcrOzm73uN/v1//+7/8qPj6e680AAECnFLLTmvn5+dq6davcbrfS09P1y1/+UrNmzdJdd92lkpIS9enTR0uXLpUkXX311frDH/6gnJwcdevWTYsXB2d6GAAAINLY/H6/33SIc9Xc3ML5eQCAMSeuOVu06DHDSRAJLHnNGQAAAE6PcgYAAGAhlDMAAAALoZwBAABYCOUMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhlDMAAAALoZwBQJRwuxs0f/6v5HY3mI4CoAMoZwAQJYqLV6u6eoeKi9eYjgKgAyhnABAF3O4GVVS8Jb/fr4qKzcyeARGMcgYAUaC4eLV8Pp8kyefzMXsGRDDKGQBEgaqqSnm9XkmS1+tVVVWF4UQAzhXlDACiQHp6hhwOhyTJ4XAoPT3TcCIA54pyBgBRIDd3muz21h/pdrtdublTDScCcK4oZwAQBZxOlzIzr5HNZlNmZo6cTpfpSADOkcN0AABAcOTmTtP+/fuYNQMiHOUMAKKE0+nSQw89bjoGgA7itCYAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIcJj7pihUrVFxcLJvNpoEDB+rRRx9VXV2d8vPz5fF4lJaWpieeeEKxsbEm4gEAABgT9pmz2tpaFRUV6dVXX9Xvf/97tbS06PXXX9eSJUs0ffp0bd68WT179lRJSUm4owEAABhn5LRmS0uLjh49Kq/Xq6NHjyoxMVHvvvuuRo8eLUmaOHGiysrKTEQDAAAwKuynNZOTkzVz5kxlZmaqa9euuuqqq5SWlqaePXvK4WiNk5KSotra2jMeKybGpoSEuFBHBgDglByO1jkOfhchmMJezg4ePKiysjKVlZUpPj5es2fP1h//+MdzOlZLi18eT1OQEwIAEBiv1ydJ/C5CQBIT4wN6XtjL2dtvv61/+Id/kMvlkiSNGjVKH3zwgQ4dOiSv1yuHw6GamholJyeHOxoAAIBxYb/mrE+fPvrwww/19ddfy+/365133tHFF1+s4cOHa+PGjZKk1157TVlZWeGOBgAAYFzYZ86GDh2q0aNHa+LEiXI4HBo0aJB+9rOfKSMjQ3fffbeWLl2qQYMGKTc3N9zRAAAAjLP5/X6/6RDnqrm5hfP8ANAJvPRSofbu3W06xnfs2dOaqX//VMNJ2uvXL1UzZ84yHQMnsew1ZwAAnK29e3fr012fKiEuyXSUdmJ8XSVJdfs8hpN8w9NUZzoCOohyBiAkXnzxP/Tmm+s1btwNmj79X03HQRRIiEtS5iVTTcewvIpda0xHQAextyaAkHjzzfWSpPXrf2c4CQBEFsoZgKB78cX/aDdesWKZoSQAEHkoZwCC7sSs2QnMngFA4ChnAAAAFkI5A4Ao4XY3aP78X8ntbjAdBUAHUM4ABN3YsePajceNu8FQks6luHi1qqt3qLiYu/WASEY5AxB0t99+R7sxS2mEntvdoIqKt+T3+1VRsZnZMyCCUc4AhMSJ2TNmzcKjuHi1fD6fJMnn8zF7BkQwyhmAkLj99jv06quvM2sWJlVVlfJ6vZIkr9erqqoKw4kAnCvKGQBEgfT0DDkcrZu+OBwOpadnGk4E4FxRzgAgCuTmTpPd3voj3W63KzeXbY6ASEU5A4Ao4HS6lJl5jWw2mzIzc+R0ukxHAnCO2PgcAKJEbu407d+/j1kzIMJRzgAgSjidLj300OOmYwDoIE5rAgAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAABHD7W7Q/Pm/ktvdYDpKyFDOAABAxCguXq3q6h0qLl5jOkrIUM4AAEBEcLsbVFHxlvx+vyoqNkft7BnlDAAARITi4tXy+XySJJ/PF7WzZ5QzAAAQEaqqKuX1eiVJXq9XVVUVhhOFBuUMAABEhPT0DDkcDkmSw+FQenqm4UShQTkDAAARITd3muz21upit9uVmzvVcKLQoJwBAICI4HS6lJl5jWw2mzIzc+R0ukxHCgmH6QBAOEyefF3bv1999XWDSQAAHZGbO0379++L2lkziXIGAAAiiNPp0kMPPW46RkhxWhNR79uzZqcaAwBgJZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOUPUGz362pPG3BAAALAuI+Xs0KFDysvL05gxYzR27Fht27ZNHo9HM2bM0KhRozRjxgwdPHjQRDREodzcaSeNo3dtHABA5DNSzh555BGNHDlSGzZs0Lp16zRgwAAVFhZqxIgR2rRpk0aMGKHCwkIT0RCFnE6XRo++VjabTaNHXxe1K0oDAKJD2MvZ4cOH9d577+nGG2+UJMXGxqpnz54qKyvThAkTJEkTJkzQW2+9Fe5oiGK5udM0aFAas2YAAMsL+w4BBw4ckMvl0n333addu3YpLS1N8+bNU319vZKSkiRJiYmJqq+vP+OxYmJsSkiIC3VkRIGEhDg9/fTTpmMAOEcOB5dInw2Hw87vxwgW9nLm9Xq1c+dOzZ8/X0OHDtXDDz/8nVOYNptNNpvtjMdqafHL42kKVVQAgEV4vT7TESKK1+vj96MFJSbGB/S8sJezlJQUpaSkaOjQoZKkMWPGqLCwUL169VJdXZ2SkpJUV1cnl4vrggBEv8rKMpWXbw7KsTwetyQpIcHZ4WNlZeUoIyO7w8cBcPbCPk+cmJiolJQU7d69W5L0zjvvaMCAAcrKylJpaakkqbS0VNnZ/FAAgLPhdrvldrtNxwDQQWGfOZOk+fPn695771Vzc7P69u2rRx99VD6fT3fddZdKSkrUp08fLV261EQ0AAirjIzsoM1QLVhQIElatOixoBwPgBlGytmgQYO0du3a7zy+cuVKA2kAAACsI6BytmfPHi1fvlxffPGFvF5v2+NFRUUhCwYAANAZBVTOZs+eralTp2rKlCmy27mdGZHH7W7QU089rvz8X7EILQDA0gIqZw6HQzfddFOoswAhU1y8WtXVO1RcvEazZv3cdBwAAL7XaafBPB6PPB6PMjMztWrVKtXV1bU95vF4wpUR6BC3u0EVFW/J7/eromKz3O4G05E6Bbe7QfPn/4rXGwDO0mlnziZNmiSbzSa/3y9JWr58edv7bDabysrKQpsOCILi4tXy+VoXsPT5fMyehQmzlQBwbk5bzsrLyyVJx44dU9euXdu979ixY6FLBQRRVVVl240sXq9XVVUVlIUQO3m2Mjd3Ktf6AUCAArq6f+rU724WfarHACtKT8+Qw9H6d4jD4VB6eqbhRNGvuHi1mpubJUnNzc0qLl5jOBEARI7TlrOvvvpK27dv19GjR7Vz507t2LFDO3bs0JYtW/T111+HKyPQIbm509ruMrbb7crN5Q+LUKuqqmy7HMLv96uqqsJwIgCIHKc9rfk///M/Wrt2rWpqavToo4+2Pd69e3fl5+eHPBwQDE6nS5mZ12jTpjeVmZnD6bUw6Nevv6qrd7SN+/dPNZgGACLLacvZxIkTNXHiRG3cuFGjR48OVyYg6HJzp2n//n3MmoXJt4uZJO3cud1QEgCIPAGtc/bFF1/ot7/9bbvHevToocGDB2vQoEEhCQYEk9Pp0kMPPW46BgAAZxRQOdu+fbu2b9+uzMzWC6krKir0ox/9SGvWrNGYMWP0L//yLyENCSDS2UwHAICIEdDdmjU1NVq7dq0KCgpUUFCgtWvXqqGhQatWrdJrr70W6owAIsygQZeedgwA+H4BlbP6+nrFxsa2jbt06aK//e1vOu+889o9DgCStHv37pPGfzWUBAAiT0CnNceNG6cpU6YoOztbUuvitNdff72ampo0YMCAkAYEEHkcDoe+vU71iXXmAESOysoyLV++LCjHOn78WNti4FbicDgUG9v1zE8MwG23/asyMrKDcqyAfmL+4he/0MiRI7Vt2zZJ0oMPPqghQ4ZIkp588smgBAEQPRobj5x2DAD4fgH/OZuWlqbk5GS1tLRIar2Ds0+fPiELBgTThg1v6IUXntUdd9ypnJyxpuNEvV69zld9/d/axuefn2gwDYBzkZGRHbSZIJydgMrZyy+/rGeeeUbnn39+20rrkrR+/fqQBQOC6cUXn5MkLVv2LOUsDJqaGtuNmTkDgMAFVM6Kioq0YcMGOZ3OUOcBgm7DhjfabSW0efObFLQQO3l7N7Z7A4DABXS3ZkpKiuLj40OdBQiJE7NmJyxb9qyhJAAAnFlAM2d9+/bVLbfcooyMjHZLZ8yYMSNkwYBgOTFr9n1jAACsJKBy1qdPH/Xp00fNzc1qbm4OdSYgxFitHgBgXQGVszvvvFNS63Uj3bp1C2kgINgGDbpU1dU7240BALCqgK4527Ztm6699lqNHdt6EfWuXbu0cOHCUOYCgmbv3r0njfeYCQIAQAACKmeLFy/W8uXLlZCQIEm65JJL9Oc//zmkwYBgufTSwe3GaWmDv+eZAACYF1A5k6TevXu3/0B7wB8KGFVdvaPdeOfOHd/zTAAAzAvomrPevXvrgw8+kM1mU3Nzs4qKithTExHj5AVRTx4DAGAlAU1/LVy4UKtWrVJtba3S09NVXV2tf/u3fwt1NiAobDbbaccAAFhJQDNnLpfrOxucr1ixQtOnTw9FJiCoWOcMABBJzvnCsRUrVgQxBhA6LpfrpHEvQ0kAADizcy5nzD4gUhw8ePCkscdQEgAAzuycyxnX7SBStLS0nHYMAICVnPaas2HDhp2yhPn9fh07dixkoQAAADqr05azbdu2hSsHAAAA1IHTmgAAAAi+gJbSAADAJI/HLU/TV6rYtcZ0FMvzNNUp1sNNe5GMmTMAAAALYebMALe7QU899bjy838lp9N15g8AgE4uIcGp44dsyrxkqukollexa40SEhJMx0AHUM4MKC5ererqHSouXqNZs35uOg7QprKyTOXlmzt8HLvdLp/P1268YEHBOR8vKytHGRnZHc4FAJGA05ph5nY3qKLiLfn9flVUbJbb3WA6EhB0gwalnXYMAPh+zJyFWXHx6rYZBZ/Px+wZLCUjIztoM1S5uePk8/mUlJSsRYseC8oxAaAzYOYszKqqKuX1eiVJXq9XVVUVhhMBoTFoUJrS0obo+edfMh0FACIK5SzM0tMz5HC0Tlg6HA6lp2caTgQAAKzE2GnNlpYWTZ48WcnJyVq2bJn279+v/Px8eTwepaWl6YknnlBsbKypeCGTmztNFRVvSWq9SDo3lzuPgEjz0kuF2rt3t+kY37FnT2umjtx8EQr9+qVq5sxZpmMAEcNYOSsqKtKAAQN05MgRSdKSJUs0ffp0XXfddVqwYIFKSkp00003mYoXMk6nS5mZ12jTpjeVmZnDUhpABNq7d7f+uuNjpZgOcpK4v79t3PGx0RzfVmM6ABCBjJSzmpoaVVZW6o477tCKFSvk9/v17rvv6sknn5QkTZw4Uc8880xUljNJuvTSIdq48Q0NHjzEdBRLC9ayDqfCsg7oqBRJt8lmOoblLRcr1SO4OsNaoUbK2eLFizVnzhw1NjZKktxut3r27Nl2LVZKSopqa2vPeJyYGJsSEuLO+DyreeGFZyVJy5Y9o2uvHW04jXXFxcXK4ej4ZZHdunXT119/3W7ckePGxcVG5PdduJ14jaPxtQrG92Vn4nDYO/x9wGt+doLxmlvVihXLVF29Q7/7XYlmz77LdJyQCHs5q6iokMvl0uDBg7Vly5YOHaulxS+PpylIycLjww+3tZ3KPXLkiKqq3tY//uNlhlNZ0+WXj9Tll48MyrEmT76u7d//+Z8lHT5epH3fmeD1ti4ZE42v1YmvDYHxen0d/j7gNT87wXjNrcjtbtDGjRvk9/u1YcObuuGGGyNq9iwxMT6g54X9T5EPPvhA5eXlysrKUn5+vt5991098sgjOnToUNsSEzU1NUpOTg53tLB48sn26z0tWfKooSSdy3nnnSdJGjLkHw0nAQCcq1OtFRqNwl7O7rnnHlVVVam8vFxPPfWUrrjiCj355JMaPny4Nm7cKEl67bXXlJWVFe5oYdHYeOS0Y4TGgAE/VFraEC1cSBkGgEjVWdYKtcxJ/Dlz5ui3v/2tcnJy5PF4lJubazpSSHTrFnfaMQAAOLXOslao0e2bhg8fruHDh0uS+vbtq5KSjl8LZHWXXjpY77+/tW2clsYdmwAABKKzrBVqmZmzzmL79o/ajT/++ENDSQAAiCwn1gq12WxRvVYoG5+HmcPh0LFj7ccAACAwubnTtH//vqidNZMoZ2HHDQEAAJw7p9Olhx563HSMkOK0Zpj17t2n3bhPnwsMJQEAAFbEzFmY9evXX19++UXb+KKL+htMAwCRw9NUp4pd1lrX6mhz604353XpbjjJNzxNdUpSgukY6ADKWZht3brlpPG7hpIAQOTo1y/VdIRT2rOnQZKUdKF1zoIkKcGyrxcCQzkLs5YW72nHAIDvmjlzlukIp7RgQYEkadGix87wTCBwXHMGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABbC3ZoAcJY8Hrf+Jmm5/KajWN6Xks73uE3HACIKM2cAAAAWwswZAJylhASnunx+QLfJZjqK5S2XX90TnKZjABGFmTMAAAALoZwBAABYCKc1gQj30kuF2rt3t+kY37FnT2umE9vbWEW/fqmW3QoIACTKGRDx9u7dre2ffiQlmE5ykpjWN9vrPjKb49s8pgMAwJlRzoBokCD5MnymU1ievZIrOSBVVpapvHxzUI4VzBnirKwcZWRkd/g4iHyUMwAAzpHTyZ2oCD7KGQCgU8nIyGaGCpbGHD8AAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAh3ayKoWK3+7LBafeSqUeum3lZy5O9vexhN0V6NpAGmQwARhnKGoNq7d7f2fPKhLuzRYjpKOz+QTZLU8vkHhpN8Y9+RGNMRcI769Us1HeGU6v7+R0hyf+vkGyDrvl6AVVHOEHQX9mjRAz8+cuYndnIP/9lK8xs4G1ad7TwxM7xo0WOGkwDoCMoZEOE8HrfkYWuigHgkT6zbdAoAOC1+mgMAAFgIM2dAhEtIcOrA8f1sfB4Ae6VdCQnshQjA2pg5AwAAsBDKGQAAgIVwWjNAlZVlKi/fHJJjd2TtraysHGVkZAcxDQAAMImZszBLSxty2jEAAOjcmDkLUEZGdtBmqCZPvq7t36xHBAAAvo1yZsCJ2TKKGYLGiuucHf372/OMpmjPIynJdAgAOD3KGRDhrLo1zon9TPsnWShfknVfLwA4gXKGoPJ43Go4HMPWRAH47HCMXJ6Or1bPVkIAEF0sdh4EAACgc2PmDEGVkOBUfOMeNj4PwMN/7qEYVqsHAJwk7OXsyy+/1Ny5c1VfXy+bzaYpU6bo1ltvlcfj0d13363PP/9cF1xwgZYuXaof/OAH4Y4HAABgVNhPa8bExKigoEBvvPGGXnnlFf3Xf/2X/u///k+FhYUaMWKENm3apBEjRqiwsDDc0QAAAIwL+8xZUlKSkpJa72Xv0aOHUlNTVVtbq7KyMr388suSpAkTJuiWW27RnDlzwh0PAMIqmLuPnLhDtiO7jpzA7iOAOUavOTtw4ICqq6s1dOhQ1dfXt5W2xMRE1dfXn/HjY2JsSkiIC3XMoHM4WicsIzH7mTgcdrWYDhFBHA57VH4fSNH9fR5McXGxba9VR/Xq5ZKkoBwvLi6W/zvAEGPlrLGxUXl5ebr//vvVo0f7ZRdsNptsNtsZj9HS4pfH0xSqiCHj9fokKSKzn8mJrw2B8Xp9Ufl9IEX393kwXX75SF1++UjTMU6J/zsguBIT4wN6npGlNJqbm5WXl6dx48Zp1KhRkqRevXqprq5OklRXVyeXy2UiGgAAgFFhL2d+v1/z5s1TamqqZsyY0fZ4VlaWSktLJUmlpaXKzuZaBwAA0PmE/bTm+++/r3Xr1mngwIEaP368JCk/P1+zZs3SXXfdpZKSEvXp00dLly4NdzQAAADjwl7OfvzjH+uTTz455ftWrlwZ5jQIhX1HrLd908Hjrdcw/iDWbzjJN/YdiVF/0yEAAJbDDgEIKqtuKn3w70sMuC6wTr7+su7rBQAwJ6rL2UsvFWrv3t2mY3xHMNciCqZ+/VI7vIk2m3ADANAxUV3O9u7dre27PpEvzlp3ftp8rS/7R/u+MpzkG/amBtMRAACAorycSZIvzqWjl15vOoblnbfz96YjAAAAGVrnDAAAAKdGOQMAALAQyhkAAICFUM4AAAAshHIGAABgIVF9t6bH45a9qZ47EQNgb6qXxxPV3w4AAEQEZs4AAAAsJKqnShISnNp3yMs6ZwE4b+fvlZDgNB0DAIBOj5kzAAAAC6GcAQAAWEhUn9aUWveMtNoNAbbmryVJ/i7dDCf5RuvemommYwAA0OlFdTnr1y/VdIRT2rNntySp/4VWKkOJln29AADoTKK6nM2cOct0hFNasKBAkrRo0WOGkwAAAKuJ6nIG4OxUVpapvHxzUI51Yob4xB8jHZGVlaOMjOwOHwcAIgHlDEBIOJ0szQIA54JyBqBNRkY2M1QAYBhLaQAAAFgI5QwAAMBCOK0Jy+LidABAZ0Q5Q6fAxekAgEhBOYNlcXE6AKAz4pozAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAsxOb3+/2mQ5yr5uYWeTxNYflcoVitvn//1A4fi9XqAQCIDImJ8QE9j0VoDWC1egAA8H2YOQMAAAiDQGfOuOYMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhlDMAAAALoZwBAABYCOUMAADAQihnAAAAFkI5AwAAsJCI3lsTAAAg2jBzBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhThMB+hMvvzyS82dO1f19fWy2WyaMmWKbr31VtOxotqxY8d088036/jx42ppadHo0aOVl5dnOlan0NLSosmTJys5OVnLli0zHSfqZWVlqXv37rLb7YqJidHatWtNR4p6hw4d0gMPPKBPP/1UNptNixcv1rBhw0zHilq7d+/W3Xff3Tbev3+/8vLyNH36dHOhQoRyFkYxMTEqKChQWlqajhw5osmTJ+uqq67SxRdfbDpa1IqNjdXKlSvVvXt3NTc366abblJ6erouu+wy09GiXlFRkQYMGKAjR46YjtJprFy5Ui6Xy3SMTuORRx7RyJEj9e///u86fvy4jh49ajpSVEtNTdW6desktf7xl56erpycHMOpQoPTmmGUlJSktLQ0SVKPHj2Umpqq2tpaw6mim81mU/fu3SVJXq9XXq9XNpvNcKroV1NTo8rKSt14442mowAhcfjwYb333ntt3+OxsbHq2bOn4VSdxzvvvKO+ffvqggsuMB0lJChnhhw4cEDV1dUaOnSo6ShRr6WlRePHj9eVV16pK6+8ktc8DBYvXqw5c+bIbudHTDjddtttmjRpkl555RXTUaLegQMH5HK5dN9992nChAmaN2+empqaTMfqNF5//XVdf/31pmOEDD85DWhsbFReXp7uv/9+9ejRw3ScqBcTE6N169bpD3/4gz766CN9+umnpiNFtYqKCrlcLg0ePNh0lE5l9erVeu211/TCCy9o1apVeu+990xHimper1c7d+7UtGnTVFpaqm7duqmwsNB0rE7h+PHjKi8v15gxY0xHCRnKWZg1NzcrLy9P48aN06hRo0zH6VR69uyp4cOH649//KPpKFHtgw8+UHl5ubKyspSfn693331X9957r+lYUS85OVmS1KtXL+Xk5Oijjz4ynCi6paSkKCUlpW0mfsyYMdq5c6fhVJ1DVVWV0tLSdP7555uOEjKUszDy+/2aN2+eUlNTNWPGDNNxOoWGhgbGThCSAAAGfklEQVQdOnRIknT06FG9/fbbSk1NNZwqut1zzz2qqqpSeXm5nnrqKV1xxRVasmSJ6VhRrampqe3Gi6amJv3pT3/SD3/4Q8OpoltiYqJSUlK0e/duSa3XQA0YMMBwqs7h9ddf13XXXWc6Rkhxt2YYvf/++1q3bp0GDhyo8ePHS5Ly8/N19dVXG04Wverq6lRQUKCWlhb5/X6NGTNGmZmZpmMBQVVfX69f/OIXklqvsbz++uuVnp5uOFX0mz9/vu699141Nzerb9++evTRR01HinpNTU16++23tWjRItNRQsrm9/v9pkMAAACgFac1AQAALIRyBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCWEoDgFE/+tGPNGPGDBUUFEiSli9frqamJv3yl7/s8LELCgqUkZHRoZXEa2pq9OCDD+qvf/2rfD6fMjIyNHfuXMXGxkpqXQ7nL3/5iyZPnqxdu3Zp69atio+Pl91u14IFCzRs2LCAP9eWLVvUpUsX/dM//dM55wUQ+Zg5A2BUbGysNm3apIaGBtNR2vF6vfL7/brzzjt1zTXXaNOmTdq4caOampr061//WpL01Vdf6eOPP9b69es1ffp0SdLcuXO1bt063XPPPVqwYMFZfb6tW7dq27ZtofhyAEQQZs4AGOVwOPSzn/1MK1eu1N13393ufSfPfA0bNkzbtm3Tli1b9PTTTys+Pl6ffvqpxo4dq4EDB6qoqEjHjh3Ts88+qwsvvFCS9Pbbb6uwsFCNjY0qKChQZmamWlpatGTJEm3dulXHjx/XzTffrKlTp2rLli36zW9+o549e2rPnj1auHChunbtqsmTJ0tq3af1/vvvV3Z2tvLy8jRz5kzV1tZq/Pjxmj9/frvsP/nJT7Rv3z5J0n//93/rlVdeUXNzsy666CI98cQT6tatmwoKChQbG6vq6molJydr27Ztstvt+t3vfqf58+frq6++0rPPPiu73a74+HitWrUq1P8dACyAcgbAuJtvvlk33HCDbr/99oA/ZteuXXrjjTeUkJCg7Oxs5ebmqqSkRCtXrtTLL7+sefPmSZI+//xzlZSUaN++ffrnf/5nXXnllSotLVV8fLxeffVVHT9+XFOnTtVVV10lSdq5c6fWr1+vvn37qqioSGlpae0+b48ePdS7d2999tlnev7553XHHXdo3bp1kqSSkpK255WXl2vgwIGSpJycHE2ZMkWS9Otf/1olJSW65ZZbJEm1tbVas2aNYmJi9PTTTysuLk633XabJGncuHFavny5kpOT27YhAxD9KGcAjOvRo4fGjx+voqIinXfeeQF9zJAhQ5SUlCRJuvDCC9vK1cCBA7Vly5a2540dO1Z2u139+vVT3759tXv3bv3pT3/SJ598oo0bN0qSDh8+rM8++0xdunTRkCFD1Ldv33P+Wp544gk9//zzcrlceuSRRyRJf/nLX7R06VIdPnxYjY2N+ulPf9r2/DFjxigmJuaUxxo2bJgKCgo0duxY5eTknHMmAJGFcgbAEm699VZNmjRJkyZNanssJiZGPp9PkuTz+dTc3Nz2vhMX5EuS3W5vG9vtdrW0tLS9z2aztfs8NptNfr9fDzzwgEaOHNnufVu2bFFcXFzb+OKLL24rcCccOXJEX375pS666CLV19d/5+uYO3fud25AKCgo0HPPPadLLrlEa9eu1datW9ve161bt+95RaRFixbpww8/VGVlpSZPnqxXX31VTqfze58PIDpwQwAAS0hISNCYMWPanRq84IILtGPHDkmtpwm/Xc4CtWHDBvl8Pu3bt0/79+9X//799dOf/lSrV69uO96ePXvU1NT0nY8dMWKEvv76a5WWlkpq3VT8scce08SJE09bqk7W2NioxMRENTc3a/369d/7vO7du6uxsbFtvG/fPg0dOlSzZ8+W0+lUTU1NwJ8TQORi5gyAZcycObPdRe9TpkzRz3/+c91www0aOXJku1mtQPXu3Vs33nijGhsb9eCDD6pr167Kzc3V559/rkmTJsnv98vpdOq55577zsfabDY9++yzevDBB/Xcc8/J5/Pp6quvVn5+/lllmD17tnJzc+VyuTR06NB2BezbMjMzlZeXp7KyMs2fP18rVqzQZ599Jr/fryuuuEKXXHLJWX/9ACKPze/3+02HAAAAQCtOawIAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhVDOAAAALIRyBgAAYCH/D9Nb5JFvyswVAAAAAElFTkSuQmCC\n", 405 | "text/plain": [ 406 | "
" 407 | ] 408 | }, 409 | "metadata": {}, 410 | "output_type": "display_data" 411 | } 412 | ], 413 | "source": [ 414 | "plt.figure(figsize=(10,5))\n", 415 | "seaborn.boxplot(data=df, x='NumberOfParts', y='Length')" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 159, 421 | "metadata": {}, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/html": [ 426 | "
\n", 427 | "\n", 440 | "\n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
NumberOfPartsLengthLongestPartTLDRandomness
\n", 454 | "
" 455 | ], 456 | "text/plain": [ 457 | "Empty DataFrame\n", 458 | "Columns: [NumberOfParts, Length, LongestPart, TLD, Randomness]\n", 459 | "Index: []" 460 | ] 461 | }, 462 | "execution_count": 159, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "df.loc[df['NumberOfParts'] == 1]" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 160, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/plain": [ 479 | "" 480 | ] 481 | }, 482 | "execution_count": 160, 483 | "metadata": {}, 484 | "output_type": "execute_result" 485 | }, 486 | { 487 | "data": { 488 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAFACAYAAAD589sCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xl8nHW5///Xfc+WyZ60adJ9o/sOFFoquyxSUEBQvy7nWDwHVBQR4XsARVABUdEfB48/pMJhEwQppQgB2tKF7rRAC93SLV2TZt8z+33f3z8mk6ZtmsxM7nsmmVxPHz46mcxyMZ1m3rk+m2IYhoEQQgghhOgV1GQXIIQQQgghTpBwJoQQQgjRi0g4E0IIIYToRSScCSGEEEL0IhLOhBBCCCF6EQlnQgghhBC9iIQzIYQQQoheRMKZEEIIIUQvIuFMCCGEEKIXsSe7gJ7QdR1NkwMOhBBCCNH7ORy2qG7Xp8OZphk0NHiSXYYQQgghRLcKCrKiup0MawohhBBC9CISzoQQQgghehEJZ0IIIYQQvYiEMyGEEEKIXkTCmRBCCCFELyLhTAghhBCiF5FwJoQQQgjRi0g4E0IIIYToRSScCSGEEEL0IhLOhBBCCCF6EQlnQgghRJxeffXv/PTOH+Lz+ZJdikghloWz++67j7lz53Lttde2X/e73/2Oq6++muuuu47bb7+dpqam9u89/fTTXHHFFVx11VWsXbvWqrKEEEII07z++j84cvQw9fV1yS5FpBDLwtmNN97IM888c9J18+bN45133uHtt99m1KhRPP300wDs37+f4uJiiouLeeaZZ/jVr36FpmlWlSaEEEKYStNCyS5BpBDLwtns2bPJyck56bovfOEL2O12AGbOnElFRQUAK1asYP78+TidToYPH87IkSP5/PPPrSpNCCGEMFUoJOFMmMeerCd+4403+NKXvgRAZWUlM2bMaP9eYWEhlZWV3T6GzaaQm5tuWY1CCCFENNxuh3weCdMkJZw99dRT2Gw2vvzlL/focTTNoKHBY1JVQgghRHzq65vl80h0q6AgK6rbJTycLV68mNWrV/P888+jKAoQ7pRFhjgh3EkrLCxMdGlCCCFEXGRYU5gpoVtprFmzhmeeeYannnoKt9vdfv1ll11GcXExgUCAo0ePcujQIaZPn57I0oQQQoi4STgTZrKsc3bXXXexefNm6uvrueiii/jxj3/MwoULCQQCLFiwAIAZM2bw61//mnHjxvGlL32Ja665BpvNxi9/+UtsNptVpQkhhBCmCgYDyS5BpBDFMAwj2UXEKxjUZIxfCCFE0nz1q/MB+NnP7uWCCy5McjWit4t2zpmcECCEEEL0UCAQTHYJIoVIOBNCCCHi0HHgSYY1hZkknAkhhBBxCAZPdMsCAQlnwjwSzoQQQog4BAL+9svSORNmknAmhBBCxMHv93d6WYieknAmhBBCxKFjIPP5fEmsRKQaCWdCCCFEHDoOa8qcM2EmCWdCCCFEHDp2y/x+6ZwJ80g4E0IIIeIgw5rCKhLOhBBCiDj4fF4AFEVtvyyEGSScCSGEEHGIdMvS7BnSOROmknAmhBBCxMHrDXfL3M6M9stCmEHCmRBCCBGHyFBmmkPCmTCXhDMhhBAiDuFAprSFM0+yyxEpRMKZEEIIEQev14vD7sSuOqVzJkwl4UwIIYSIg9frwWlz4bC5CAYDaJqW7JJEipBwJoQQQsTB6/Vgtzmw25wAsp2GMI2EMyGEECIOHo8Xm+LE0RbOPB6ZdybMIeFMCCGEiIPH04rD5sRhcwHIogBhGglnQgghRBw8Hg92m3TOhPkknAkhhBBx8Ho8ONoWBICEM2EeCWdCCCFEHDzecDizt3fOWpNckUgVEs6EEEKIGOm6js/nPWnOmXTOhFkknAkhhBAximw623FYUxYECLNIOBNCCCFiFAlidpsTu+pAQZHOmTCNhDMhhBAiRpEg5rS5UBQFh12OcBLmkXAmhBBCxKi9c6aGFwM4bC5ZECBMI+FMCCGEiFGkcxaZb2a3SedMmEfCmRBCCBGjSOcssgGtXXXKggBhGglnQgghRIwinTN7h3DmaZVhTWEOCWdCCCFEjHy+E1tphP904pFhTWESCWdCCCFEjCLzy+yqI/ynzYlXttIQJpFwJoQQQsTI5/NiU+2oqg0ID2v6fL4kVyVShWXh7L777mPu3Llce+217dc1NDSwYMECrrzyShYsWEBjYyMAhmHw8MMPc8UVV3Ddddexc+dOq8oSQggheszr9WK3Odq/ttsc+PxeDMNIYlUiVVgWzm688UaeeeaZk65buHAhc+fOZdmyZcydO5eFCxcCsGbNGg4dOsSyZcv4zW9+w0MPPWRVWUIIIUSP+Xy+9pWaEO6c6bpOKBRKYlUiVVgWzmbPnk1OTs5J161YsYLrr78egOuvv54PPvjgpOsVRWHmzJk0NTVRVVVlVWlCCCFEj/h8Pmyqg0M1OzhUs6N97pkMbQoz2BP5ZLW1tQwaNAiAgoICamtrAaisrKSoqKj9dkVFRVRWVrbf9kxsNoXc3HTrChZCCCE6oetB7KqDgzU7ABg5YDIATifyuSR6LKHhrCNFUVAUpUePoWkGDQ2yOkYIIURitbR4sCkODMJzzCKds+rqBlyurGSWJnqxgoLo3hsJXa05YMCA9uHKqqoq8vPzASgsLKSioqL9dhUVFRQWFiayNCGEECJq4WHNE/0NW1s48/tlWFP0XELD2WWXXcaSJUsAWLJkCZdffvlJ1xuGwbZt28jKyup2SFMIIcTpdF1Pdgn9gt/vPyWchS8HAoFklSRSiGXh7K677uIb3/gGBw8e5KKLLuL111/n1ltvZf369Vx55ZVs2LCBW2+9FYCLL76Y4cOHc8UVV/DAAw/w4IMPWlWWEEKkrDfeeI3bf/i9ZJfRLwQCgfZuGUg4E+aybM7Zn/70p06vf+GFF067TlEUCWRCCNFDr7zyYrJL6DcCAT/ZLlv715FwJsOawgxyQoAQQggRo2Aw2GnnLBiUfc5Ez0k4E0KIFCO71FsvGAigKh06Z22Xg0EZ1hQ9J+FMCCFSjCwKsJZhGIS0EDb1RDhT2ztnwWSVJVKIhDMhhEgxmqYlu4SUFjmiSVVOTNtW2ztnEs5Ez0k4E0KIFCOdM2uFQuEA1rFzFrksZ2sKM0g4E0KIFCOdM2ud6Jx1GNZUIuFMOmei5yScCSFEipHOmbUi4UxRTnyERi5L50yYQcKZEEKkGE2TgGClSGdS7RDOwpcV6VoKU0g4E0KIFCNbaVjrxLDmyR+hqqpK50yYQsKZEEKkGBnWtFbk9VVODWeKiq5L50z0nIQzIYQQIgaRAKac8hGqKKoEY2EKCWdCCJFiZFjTWpoW6ZwpJ12vKjLnTJhDwpkQQqSYU0ODMFd75+yU11lBlWAsTCHhTAghUoyEM2u1zzk79SNUUWRYU5hCwpkQQqQYVZUf7VaKdMdO65xJOBMmkX/BQgiRYiScWetE5+zUYU0JZ8Ic8i9YCCFSjISzBDlt+FiGk4U55F+wEEKkGJvNnuwSUtqZJv0rXXxPiFhIOBNCiBQjnTNrtc85O/UbioQzYQ75FyyEECnGZrMlu4R+SoY1hTkknAkhRIqRcCZE3ybhTAghUozscyZE3ybhTAghhBCiF5FwJoQQQphCFgMIc0g4E0IIIcxgyJCyMIeEMyGEECIGkQB2ap/MQMKZMIeEMyGEECIG7QHstD3NDAlnwhQSzoQQQogYRDb5NU7pnRkSzoRJJJwJIYQQMTgRwE7pnBmGnM4gTCHvIiGEECIGitLWOTOkcyasIeFMCCGEiIGqti0IODWcGRLOhDkknAkhhBAxaO+cdbJeU4Y1hRnsyXjS559/ntdffx1FURg/fjy//e1vqaqq4q677qKhoYEpU6bw+9//HqfTmYzyhBBCiDNqXxBwSudMN4z24CZETyT8XVRZWcmLL77IG2+8wTvvvIOmaRQXF/P444/z3e9+l+XLl5Odnc2iRYsSXZoQQgjRrROrNfWTrjdkQYAwSVLeRZqm4fP5CIVC+Hw+CgoK2LRpE1dddRUAN9xwAytWrEhGaUIIIUSXZJ8zYbWED2sWFhZyyy23cOmll+JyuZg3bx5TpkwhOzsbuz1cTlFREZWVlYkuTQghhOjWmfY50w1dOmcJUFKyi5aWFiZPnkp6enqyy7FEwsNZY2MjK1asYMWKFWRlZfGTn/yEtWvXxvVYNptCbm5q/sUIIUS85Oeitfz+DCA8x6wjQ9fJyEiT199ClZUV/Pzn9wDwrW99m1tu+V6SK7JGwsPZhg0bGDZsGPn5+QBceeWVfPrppzQ1NREKhbDb7VRUVFBYWNjtY2maQUODx+qShRCiT5Gfi9ZqaQkAJ885MwwDA4NAQJPX30IHDhxpv3zsWHmfe60LCrKiul3C+69Dhgzhs88+w+v1YhgGGzdu5KyzzuL8889n6dKlALz55ptcdtlliS5NCGGio0ePsHr1ymSXIYTpIkOXuq7jDbTQ5K1lf9W2k74nrFFfXw+ATVGor69LcjXWSXjnbMaMGVx11VXccMMN2O12Jk2axNe//nUuueQSfvrTn/LEE08wadIkbr755kSXJoQw0R/+8AhlZce45BL5RSvRdF3mPlkp8tpWNx+lxR8OC1uPfHDS94Q16upqAMh22qmprkpyNdZJyj5nd9xxB3fcccdJ1w0fPly2zxAihZSVHQPCq7NtNluSq+lfJJxZK/LaNnprzvg9YY2amhpsqkqW005lbU3Knsog7yIhhKX8fn+yS+h3NE1LdgkprX1Y0zj9dVZV+UXEStXVlbjtNtJtNvyBAM3NTckuyRISzoQQlvL5vMkuod/RdQlnVmrvjp22z9mJczeFNcrLy3CrCm57OARXVBxPckXWkHAmhLBUIBBIdgn9jq7r3d9IxK2roUsZ1rSOrutUVBwn3WEj3REOZ+Xl5UmuyhryLhJCWOrU8weF9UIh6ZxZKTJ02dk7W87WtE5NTQ3BYJAMh510uw0FKCs7muyyLCHvIiGEpVJxsm5vJ8Oa1jrxnu5sWFM+Vq1y9OhhADLtNlRFIcPp4OjRI93cq2+Sd5EQwlIulyvZJfQ7Mqxpra5+4ZBfRqxz5MghADKd4Y0mMuwqhw+XJrEi60g4E0JYKi0tLdklCGEqmXOWHIcOHcTtsONoe42zHHaqqqrwePrWKQHRkHeREMJSTqd0zhJN5vmJVFR6YB+Z9hOxJbutg3boUOp1zyScCSEsJRvQJp4MrVlLhjUTz+v1cLziONlOR/t1kXBWWnogWWVZRsKZEEKkGAkIItWUlh7AMAxynCcONnLZbKTZ7ezfvzeJlVlDwpkQQqQYCWci1ezbFw5gOR06ZwDZDpW9e3cnoyRLSTgTQogUI0cIWaurOX0y388a+/aVkO6w47SdHFtynA4qKytT7hgnCWdCCJFiZMWgtSSAJd6ekt1kO07/pSPXFe6k7d27J9ElWUr+BQshRIqx26VzZqWuwpnsMWe+mppq6hvq24NYR9lOO4oCe/eWJKEy60g4E0KIFCOdM2udCGCnz+2TcGa+PXvCwSvXeXo4s6sqWU4He/ak1rwz+RcshBApRuacWStyPFZnyy4knJlvz57d2FSVrA4rNTvKcdjZu7cETUudY8sknAkhRIqRveWspWltAayTdKZpocQW0w/sKdlFtsOOeoZVyLkuB36/v/3szVTQeQw9xcGDB3n22WcpLy8nFDrxxnvxxRctK0wIIUR8ZCsNa50IYKe/zqnUvekN/H4/pQcPMDLzzMfAReailZTsZtSoMYkqzVJRhbOf/OQnfOMb3+BrX/uazGUQQoheTsKZtSJNCqWTcNaxgSF67sCBfei63ul8swi3TSXNbmfPnt1cffX8BFZnnajCmd1u55vf/KbVtQghhBC9XjB45gAWDAYTWEnqi0z072ylZoSiKGQ7bJTs3pmosizXZRusoaGBhoYGLr30Ul5++WWqqqrar2toaEhUjUIIIUSvEQqFA9jpHUpFwpnJSkp2keF0nLb57KnyXA6qqquor69LUGXW6rJzduONN6IoSvueLs8++2z79xRFYcWKFdZWJ4QQQvQygYAfOH1YU1EUgsFAMkpKSYZhUFKyi5xONp89VaSztmfPbubMmWd1aZbrMpytXLkSCE/Ic7lcJ33P7/dbV5UQJtN1nVAohNPpTHYpQog+LhAIB7BTO2cKSvv3RM8dP15OS0sLI/Ozur1ttjO8mjNVwllUs/u/8Y1vRHWdEL3V448/yre//dWUaXkLIZLH5/MBnXfOIt8TPVdSsgvoer5ZhKoo5Djt7E6ReWddds6qq6uprKzE5/Oxa9eu9uHNlpYWvF5vQgoUwgwffbQRgNLSA5xzTn6SqxFC9GV+f1sAU07ubygoMqpkot27d+K028iI8jiyXJeDAwf24/f7cLnOvPVGX9BlOFu3bh2LFy+moqKC3/72t+3XZ2RkcNddd1lenBBm6NgtKy3dzznnzE5iNUKIvu5MnTNQ8Ho9iS8oRe3cuZ0chy3qrWHyXA4ONnnYu3cP06bNsLg6a3UZzm644QZuuOEGli5dylVXXZWomoQwVcc2966d2+Hm/5PEaoQQfV1k5Eg9pXOmKqqMKpmktraGysoKJuRmRn2fyPDnrl07UjucRZSXl/Pcc8+ddF1mZiZTp05l0qRJlhQmhFm2b/+cNLvChUVeVpfsJBgM4nB0P4dBiL7KMAzZiNZCke7YaQsCFAWPRzpnZtix43MA8tOi/1ntUFWyXQ62b/+Mr3/9W1aVlhBRLQjYsWMHr776KpWVlVRWVvLqq6+ydu1afvGLX/C3v/3N6hqF6JGS3dsZlxNgcn6IYDBEaen+ZJckhKVkl3preTwelLb/daQoKl4JZ6bYvv0znDYbWY6oekjt8p0O9u4t6fMLM6IKZxUVFSxevJh7772Xe++9l8WLF1NXV8fLL7/Mm2++aXWNQsRN13XKj5czIlNjeGb4zLuysmNJrkoIa8n5jtZqbW3BYXfBaVtpqHg8re2L50R8DMNg27ZPyHPZY+4AD0hzomkaO3dut6i6xIgqnNXW1p60P5TD4aCmpoa0tDTZN0r0asFgkFBII8Ohk+kI/8CUYQeR6mQjVGu1trbgtJ++GlBVVEJaqH2TWhGfI0cOU19fz8C02PNFXpoDm6qydevHFlSWOFH1C6+77jq+9rWvcfnllwPhzWmvvfZaPB4PY8eOtbRAIXrC4Qj/Q/UEVTzB8G9gaWl9e4m1EN2RjVCt1dLSgtN2+s8RpW2BQEtLS5/fyiGZPvlkM0Bc4cymKOQ77WzZ8hHf+973++zcy6jC2e23386FF17I1q1bAfjVr37FtGnTAPjjH/9oXXVC9JCqqgwZMoSjLQc52hLeK2fYsOFJrkoIa8mKQWs1NzfjsLnQDf2k6yOrN5ubmxkwYGAySksJH23aQI7LQVqU+5udqiDdxa6aag4fPsioUWNMri4xop5pN2XKFAoLC9vnMpSXlzNkyJC4nrSpqYlf/OIX7N27F0VRePTRRxk9ejQ//elPKSsrY+jQoTzxxBPk5OTE9fhCdDRh4hQ2fHiMonQNh93OmDFnJbskISzl80k4s1JTUxNOew6+YOtJ1yvt4awpGWWlhJqaavYf2Me4nIy4H2OQ28Uumtm0aUOfDWdRzTl76aWXuOCCC7jlllu47bbb2v8fr0ceeYQLL7yQ999/n7feeouxY8eycOFC5s6dy7Jly5g7dy4LFy6M+/GF6GjatBl4grD0aBrjJ0yUeZIi5cm8Sms1NzXhsrtPu15tH9ZsTnRJKWPjxnUAFKa7urnlmblsKnkuJ+vXfdhnF2dE1Tl78cUXef/998nLy+vxEzY3N7NlyxYee+wxAJxOJ06nkxUrVvDSSy8BcP311/Od73yHe+65p8fPJ8TkyVPbL0+ZMj2JlQiRGK2tLckuIWWFQiE83lZceemnfU9VwsNwjY2NiS4rZaxds5psl4OMGLfQONXgdBe7jpdz6FApo0f3vbnxUf3XFxUVkZXV/anw0Th27Bj5+fncd999lJSUMGXKFH7+859TW1vLoEGDACgoKKC2trbbx7LZFHJzT/8HIkRHHd8j06dPlfdMgsnrnRgdt88IhXzyulsk8tnkcpz++kaGNQMBj7z+cTh69CgHSvfHdCrAmRSmu9jd0MJHH61j1qxpJlSXWFGFs+HDh/Od73yHSy655KQhoQULFsT8hKFQiF27dvHAAw8wY8YMHn744dOGMBVFiWqFhaYZNDRI+15ELysrX94zCSavd2I0NNS3X66srJHX3SJHjpQDkGbvLJwpuBxuKiqq5PWPwzvvvIsCFPVgSDPCaVMZmOZg2bKl3Hzzt7HZ4ltcYLaCgugaXVHNORsyZAjz5s0jGAzS2tra/v94FBUVUVRUxIwZ4XOvrr76anbt2sWAAQOoqqoCoKqqivz8/LgeX4iumDE0L0Rv1NDQ0H65vr6+i1uKnoi8zmmddM4i13f8uxDR0XWdlSuXMyDNGfcqzVMNzUijsbGRbds+NeXxEimqztmPfvQjILw82+0+fRJkLAoKCigqKqK0tJQxY8awceNGxo4dy9ixY1myZAm33norS5Ysad9TTQgzpaX17P0rRG9VX1/b6WVhrkiH0uXofDWhy5ZxUhdTRGf79s+oq6tl+oBs0x6zwO3CabOxevUKzjlntmmPmwhRdc62bt3KNddcw5e+9CUASkpKeOihh+J+0gceeIC7776b6667jt27d/P973+fW2+9lfXr13PllVeyYcMGbr311rgfX4gz6S2tbSHMFpkLZQdqqquTW0wKiwQv9xnCWZojg/q6ukSWlBJWrfoAh83GIBOGNCNURaHI7WTzRxtobu5bK2ij6pw9+uijPPvss/zgBz8AYOLEiXz8cfxHI0yaNInFixefdv0LL7wQ92MKIUR/Vl0dnhaSBlRXVSa3mBRWX1+Pw+bEbut8S540RwbH6w5gGEaf3Z0+0TweD5s2rafI7cRm8ms2NDONIy1e1q//kKuvvtbUx7ZSVJ0zgMGDB598RzXquwohhLBYZWUFdsK/cTe1NMspARapq6vF7TzzakK3M5NgMCB7zcVg48Z1BINBhmSYf+RVlsNOltPBqlUfmP7YVooqYQ0ePJhPP/0URVEIBoM8++yzcqamEEL0IuVlx7ADjravKyuPJ7OclFVXV0uavYtw5gh/T+b9RW/NmlVkOOzkOHu2t1lnFEVhcLqT/fv3UVHRd/5NRBXOHnroIV5++WUqKyu56KKL2L17Nw8++KDVtQkhUkBf3aG7LzEMg+PlZTg4Ec7KysqSWVLKqq2tJe0M880A0trCWTR7dQpobGxg587tFLqdlg0DF6WHO3IbNqy15PGtEFVMzc/PP+2A8+eff57vfve7VtQkhEghmqZht5v/G7E4ob6+Do/PSz7hcKYAx44dSXJVqccwDBrq6xhYMOqMt0lvG/Ksq5NwFo0tWz7CMIweHdfUHbfdRo7LwaZN67nxxq9Z9jxminvi2PPPP29iGUKIVOX3+5NdQso7ejQcxJyEg1meqnL06OGk1pSKmpqaCGkh3M4zbyQa+V5tbU2iyurTPv74I9wOO1k9PK6pOwVpTg4c2N9ntjmJO5zJUIUQIhrBYCDZJaS8w4cPASeGNAt1nUOlpUmrJ1VFAldXCwJsqh2Xwy3DmlHQNI0d2z9jgNNu+crWgWnh1bU7dnxu6fOYJe5wJkuEhRDRCAaDyS4h5R06VEq2qhLZxa8IqKiswOfzJbOslFNbG94/Lt3Z9Uap6c6s9tuKMzty5BBen4+8tM63JTFTttOO3aaye/dOy5/LDF32EWfNmtVpCDMMQ4YqhBBR6Xggt7BG6f69FOk6kR7lYMDA4NChUiZOnJzM0lJKTU33nTOANHsW1bIRcLdKSw8AWLJK81SKopBlt7F//z7Ln8sMXb4iW7duTVQdQogUpet6sktIaV6vl2PlZVwMHGq7bmjbn6Wl+yWcmaimphpVsZFmP/NqTQh3zipq9yeoqr7r2LGjqIpCuklnaXYn02Hn2LEjfWKDYNlJVghhKV2XzpmVSkvDu9EP63BdFpClquzbtydZZaWk2toa0l2Z3X6wp7uyaW1tkWHlbtTUVJPuiH2+WYM/SGljKw3+2KZMuO02fD4fHk9rTPdLBglnQghLhUKhZJeQ0vbu3Q1wUjhTUBim6+zZvSs5RaWo6uoq3I4zr9SMSG9bsVlTI0ObXWlqasQeYwOrwR9kR7OPmVdew45mX0wBzamGn6wvnLMp4Uz0KzL/KfHkGCFr7dq1g4GKSgYnf8qNBCqrq2RLBxNFH87CCwYknHUtEPBjizGc1fkCXDP/Wn784x9zzfz51PmiXw1uawtngUDvX0EuO0OKfiUUCmGzJWZ+gwhraen9v6X2VZqmsXvXDqYaOpwSzka3/blz53YuuujShNeWajRNo76+jkGFZ3V7W+mcRUdVbcS6K1d+mpN3i98BDN4tLmZqVvTncUaeS1V793wzkM6Z6GdCIdnWIRE6LgJobGxMYiWpbc+e3Xh9Pjo76bgISFdUtm37NNFlpaSGhnp0Xe9yA9qI8PmaioSzbrjd6cQ66SHX5WBqVhrblr3H1Kw0cl2O7u/UJtSWztLS3DE+a+JJ50z0K7LnVmJ0nHDb3NyUxEpS2yefbEEFxnTyPRWFswydrZ9sQdM06Rj3UHV1FXCiK9YVVbWR7sqUcNaNvLw8AnrsG9rnuhwxhbIIf0hDURRycnJjvm+iSedM9Csy5ywxWltbO70szGMYBh9tXMdowE3nwzSTgKaWZkpKZGFAT0Xm7mV0swFthNuRJeGsG4WFRfiCIUIJ2m7HE9LIz8vH4Yg92CWahDPRr0jnLDE6TrjtC5Nv+6JDh0o5XllBV7uYjQMcisL69WsSVVbKigQtdwzhrLqqysqS+rwRI0YC0BJMzC/NLSGdkaNGd3/DXkDCmehXJJwlRsc5Z7LPmTU+/HAVNhSmdnEbFwoTDYP1az+U934P1dTU4LA7cdpdUd0+3ZlFbV2NnEPdhXHjJgDEvF9ZPIK6TnMg2P6cvZ2EM9GvBAJy7FgidAxkmiYnBJgtGAzy4aoPGI9B+hmGNCPc3zAtAAAgAElEQVRmAS2eVrZs2ZSY4lJUbW11t2dqdpTuzCIYDMqcyy7k5eVTWFgU03YY8ar3hQPg5Mld/TrTe0g4E/2Kx+NJdgn9Qse5fdI5M9+WLZtoamnm3ChuOxbIVVSWL3/f6rJSWk1NDWn2rs/U7CiycED2mevarFnnUBcIoVncYazxBXA5nUyYMMnS5zGLhDOR8joOK9TX1yWxkv5D5pxZa+n7xeQqKt3vuBVetXm2ofP559s4frzc8tpSVW1NTVQrNSMiW27U1dVaVVJKOPfc89B03dLumWEYVPuCzJh5Tp9YDAASzkQ/0HHF1LFjR5NYSf/h9/s6XJahZDOVl5exY+d2zjV01G6GNCPOIfzDXrpn8dE0jcamBtzO6Dtn7vbOmYSzrkydOgO3202Fx7qfEw2BEL5QiDlzLrDsOcwm4UykvI7bCOzevSOJlfQfkeFjw2HQ2tqS5GpSywcfLEUFzo7hPtkoTABWrlgqCwPi0NBQj2EYbZvLRifNng4o0jnrhsPhYM6ceVT7Amhx7HkWjYpWHw67g9mz51jy+FaQcCZS3ubNm8h2wrUjfZSU7JIJugnQ/hpnQpO83qbRNI1VK5czAciKsmsWMRtobmnh4483W1JbKosErFg6Z6pqw+3MkHAWhQsvvISgplPtM797phsGFd4A584+n/T0dNMf3yoSzkRK83g8bNmykfMG+ZhbFEDXDdnzKQEaGhoAMHIMGhpknp9Ztm37lKbmJmbFcd8xQJaisubDlWaXlfIaGuoBSIuhcxa+fUb7fcWZTZ06nby8PMpbfd3fOEY13gABTePiiy8z/bGtJOFMpLSPPtpAMBjigqIAIzI1hmXq8uGUALW1NahuFdKhqalZhtJMsmnTetIUhXFx3NeGwlRDZ+vWj/F6vabXlsrq6yPhLCOm+7ns6dTXSTjrjs1m4+KLL6fGF8Bv8ikuZa0+srOymTXrHFMf12oSzkRK+/jjzeSnwbgcDUWB8wf52bN3jwxtWqyysgI9XYcMwDDazyUU8dN1nS2bNzLeMLDHOKQZMQkIhkJyGHqMGhvDnWCXPbZhMemcRe/SSy/HMOB4q3lDmwFNp8YX4KKLL8Nu71tHiUs4SwKv10N5eVmyy+gX9u/bzYScAErbZ9mE3BAABw7sT2JVqe/oscPoWTpGVniCb1nZsSRX1PcdPnyQ5paWuLpmESMAp6KwY8dnZpXVLzQ1NeK0p2FTTz48Pqj5cbvd3HTTTbjdboLaycHCZXfT1NwkpwREYdiwEZx11jjKPX7TXq/jHh+6YXDppV805fESScJZEjz11J/58Y9vTXYZ/UJzSws5rhM71Oe2XZbOmXUaGuppamyCbML/J3wOpOiZ3bt3AtCTkwFtKIwwDHbu+NycovqJxsZGXI7Tu2bBkJ/58+dzxx13MH/+fIKhU8NZOqFQUIaRo3TppVfQHAjSHAyZ8njHPX5GjhzFqD5ynmZHEs6SQCakJ066Ox1P8MQQkCcUvtyXVu30NXv27A5fCIJSpqDkKCdtZyLic+DAfrJUlZw4hzQjhgFlZWWy/1wMmpubcdrSTrveYXdRXFzMk08+SXFxMY5Tzt102t0AtLTIdjLRmDfvIuw2mykLA1qCIRr9QS677AoTKks8CWcipQ0ZOoxjrSfmGhxtCQ9LDB48NFklpbytWz9BcSgo1QrKIQWtQGPHzs9P2phWxO7wwVIK9Z6fU1oE6IbO0aNHel5UP9FypnBmc+H1elm0aBFerxeH7eRw5rKH79PSIp36aGRlZXH2OedR6Q30eGjzeKsPRVGYN+8ik6pLLAlnIqWNHz+Jw802/G0LgPY12MnKzGDw4CHJLSxFaZrGR5s3ohfqRBo8xhCDUDDEtm1bk1tcH2YYBuXlZRSY8FiRxzh+XOa9RqulteW04BUNR1uga21tNbuklHXRRZfiC2nU+eNf4W207W02bdoM8vLyTawucSSciZQ2ceJkNAMONoW7Z/ubnEyYOAVF6dnQkOhcSclumhobMYZ1+K23ABSXwoYNa5NXWB9XX1+HPxhgQBe3MTBoAqqBzRgYdN55yCecm2VRUvS8Xg8OmzPm+0Xu4/V6zC4pZZ199rm4XC6O92BosykQwhMMceGFl5hXWIIlLZxpmsb111/PbbfdBsDRo0e5+eabueKKK7jzzjvlsGRhishE0LJWlaAOxz0KI0f2vcmhfcVHH21AsSkYgzsEAxW0wRoff7JZ9juLU2VlBRAOVmeyBagDWoG3277ujB2FbFVtf0zRPZ/Piz2OcBa5j8cjCwKi5XK5mD17DtW+IHqcQ5sVHj82m43zzus7xzWdKmnh7MUXX2Ts2LHtXz/++ON897vfZfny5WRnZ7No0aJklSZSSKSl3eBXaQ4oGAYMHGjG4JDozJaPN6EX6HDKlkLGEAOf1ysLA+IUCVJ5XdympJuvO8rTdSorjve0rH5B13WCwSB21RHzfSP3CQRk8UUs5s6dR0DTqI9jaNMwDKp9AaZOnU5mZpYF1SVGUsJZRUUFq1ev5qabbgLCL+amTZu46qqrALjhhhtYsWJFMkpLKNn7xno2mw2bqnKwycbGyvBvsX1tM8K+oqLiOFWVlRhFnbyvBwEqsvlpnCoqjqMAuV3c5tSPsa4+1vKBCplzFpXIKI5Njf3nRuQ+shgmNrNmnYPD4aDKE3uobQlqtAZDzJkzz4LKEicpn1KPPvoo99xzT/skyfr6erKzs9s/NIuKiqisrOz2cWw2hdzcvrslQmamE4cj9t/GRPT8fj+arlPWaqM1FPldJNSn3ze91bp14a6YUdhJOHMAA2DHzs/ktY9DdXUFeaqKXTfnF7oBwKdNTdjtOpmZsZ0X2d80N4dXE6mKrZtbnk5Vwj9zHA5V3vcxSefcc2ez7ePNTDSMmOYIV3nDge7yyy/p0695wsPZqlWryM/PZ+rUqXz00Uc9eixNM2ho6LsTLauqGsjIiO2sNhGbY8eOAmBXwaYYOG1w+PCxPv2+6a3Wr9+Akq7AGUYS9EE6+3ft4/DhcnJyuuoBiVOVHjjAQF2HHu5xFhEZ2N+1ay/jx0805TFTVWNjMwCKEvtAk9IW6FpavPIzJ0YzZ57Lxo0baAlqZDmjjyo1vgBnjR2Hzebula95QUF0Q60JD2effvopK1euZM2aNfj9flpaWnjkkUdoamoiFApht9upqKigsLAw0aUlXCgkk6OtduTIIQBctnDHYUiGzuHDB5NYUWry+Xxs++xTtKHaGfODMdiAnbBly0d88YtXJbbAPiwYDFJWdgwzB2mK2v48dOighLNu6G17y8UXzpSTHkNE7+yzzwWg2uuPOpwFNJ0Gf5Crzj3PytISIuFzzn72s5+xZs0aVq5cyZ/+9CfmzJnDH//4R84//3yWLl0KwJtvvslll12W6NISTv7BWm/fvr3Y1RPhbHRWkAP798prb7JNmzYQ8AcwRnQx7JYLSrbCylXLE1dYCjh48ACarmPmzny5QLqism/fHhMfNdXFP6QsW/fELj9/ACNHjqLGF30To8YXnh8YCXZ9Wa/Z5+yee+7hueee44orrqChoYGbb7452SVZTv7BWm9PyU5GZ2ntzZxxOSE8Xi/Hjsnu6GYxDIPid99CyVJgYBc3VEAbpbGnZDcHDx5IWH19XUlJ+DisESY+poLCMEOnpO28TnFmqhr+mJQFXIk3a9a5NASChKL8ZbrG6yczM5MxY86yuDLrJTWcnX/++Tz99NMADB8+nEWLFrF8+XKefPJJnM7Y95TpayScWcvv97P/wH7G5574zWt8bvhA3d27ZUsHs+zY8TmlB/ajjTvzkGaEMdpAcSi8sfifiSkuBWzfvo0BqkqWSfPNIkYB5cfLqaurNfVxU43NFp43Fk840/XwYgJZIR6fmTPPxjAM6qLonhmGQV0gxIwZZ7cH6r6s7/8X9GEytGatffv2oGkaE9oCGUChWyfXBbulY2AKwzB46e/PoaQrGKOi+PBygnaWxsYN6ygtle5Zd4LBIDt3fM5YC35WRHoLsr1J12y2cLDSjVA3tzydbkg464mJEyfjdDrbhyu70hLU8Ic0Zs48OwGVWU/CWRJpmpbsElLarl07UOCkcKYoMCE3wM4dn8kwhQnWrl3Ngf370CZrEOVOA8Z4A8Wl8L/PPS1/B93Yvv0z/IEAEyx47EIgW1HZsmWTBY+eOiKjOJoe+89rTQ//7HE4Un8kyAoOh4MpU6ZRH+g+GNe2BbgZM2ZZXVZCSDhLIo+n9y3zTSXbP9/GyGydDMfJAWByXpC6+nqOHy9PUmWpwev18PyLz0Ae0XXNIpygTdHYvWsn69evsa7AFLBp03qcioIVB46pKEw0dLZt/QSvV44XOhObzYbD4SCkx36kYEgPD8e53W6zy+o3pk+fRUsgiC/UdTiu9QUYPHgIAwZ0NfG175BwlkRNTY3JLiFleTwe9uwtYWre6T9Qp+aHfwvbuvWTRJeVUl599WUa6xvQZnU/1+xUxhgD8uB/n1vYvhm1OFkwGGTjhrVMMgwcJs83i5gGBIJBNm/eaMnjp4q0NDdB7fSfJaeeGnDq16G2+0g4i9/06TOAE52xzuiGQUMglDJdM5BwlnAdh3Hq6+uSWElq27r1YzRNY1bB6f+gC9N1hmQYbP5oQxIqSw379++luPgt9DF6eLv5WCmgna3R2NjA3//+nOn1pYLNmzfh8XqZaeFzjADyFJVVK2V7k65kZGQS1E4/SmhI7tguvw5ovrb7990zHpNtxIhRZGZkUtfFOZuNgRAhXWfq1BkJrMxaEs4SrOPwgaySss6aNavJS4NxOZ23ws8f5GPnru3ydxAHv9/Hfz/5OLjBmN6DOWP5oI/TWbbsPeliduKD5e+Rq6iMsfA5VBRmGTrbd3xOhRyEfkZZWVkEQqcP/Y4tmEmmKw+XPZ2zR17B2IKTo3QgFA5nckRW/FRVZeq06V3OO6tr66pNmTI1UWVZTsJZgrW0tHR6WZinvr6OTz/dzAWFPtQzjAbNGxzAMGDVqg8SW1wK+N//XUh5WRmhc0PhMzN7wJhqoOQoPPnnx6WT3MGxY0f5fPtnnGPoqBYNaUacQ/iD4P33iy19nr4sJyeHgHZ6OFMUBbczk2z3AM4aNPO07ZF8wdb2+4v4TZkyHW8whPcM887q/UGGDx9BdnbqvM4SzhLM7/e1Xw4ETm+Ti55bvvx9dN3g0qFnfn2L0nUm54dYtrRYVs3GYPny9/ngg6XoE/Xwcr+eskHo/BDNrc38/vcPEwzKkWYAxcX/wqYoxLLPuY/w3KabbroJt9uNr9t7hGWjMBlYsfx9vF5ZpNSZ3Nw8fMHYXxt/yIOiKGRlZVtQVf8xZco04ESHrCPdMGgMhJg6dXqiy7KUhLMEM4wT+xVJKDBfMBjk/ffeZsaAIEXpXe8NdeUwHzW1tTIZOkrbt3/GwoV/wSgyMKaauAVGDoTODbF37x6eeurJfr+9RmNjA6tWLmOmYZAZQ9fMB8yfP5877riD+fPnRx3OAOYBHp+X5cuXxlpuv5Cbm4cv0IpuxLbfnDfQSlZmdvtGtiI+w4ePID09nfpO5p01tc03mzQpdYY0QcJZwun6iQ+efv4ZZIm1a1fT2NTE1SO6/2g6uyDIoHSDf/1rcQIq69sOHSrlsd/9BiPLQD9fj3l1ZreGgz5F58MPV/KPf7xk8oP3Le+88xahUCjmg87TgOLiYp588kmKi4tJi+G+w1AYrSi8/dYb0r3sxIABAzEw8AVim4riDTYzcGCBRVX1H6qqMmnSFBqDpzc0GtoC26RJkxNdlqUknCVYKBTs9LLoOV3XeftfbzAiS2/fLqMrqgJXD/Oyd+8eSkrkOKczOXr0CA8+dD9+xUdoXggs2k/TmGSgj9Z5443XeOON16x5kl6utbWF9979F5OBghgTcBrhBUeLFi3C6/XGFM4ALjIM6hrqWb16RYz3TH2RvbM8geaY7ucNNjNgYGrsu5VsEydOpiUQJKCd3L1s8AcpGFhAfn48y8Z7LwlnCdbU1NTpZdFzn3yyhSNHj3LNCC/RHlt60VA/mU54c/Hr1hbXR+3dW8LPf3E3raEWQheFICPKOxqAF2gC5YAS/ro7ChjnGOgjdF555UVeeum5fjfE+d577+D1+bg4Cc89FhimKLz5xmsy5eIUgwaFJ1i2BqLfm9IwDFr9je33FT0zfvxEABoDJzc1GoMaEydNSUZJlpJwlmDl5WUAaBkFHCs7luRqUoeu67z6jxcoSDeYUxj9Tt5ptnD37ONPNrN3b4mFFfY9n3yymV8+eB8ePIQuCUEMWzUppQpKi4LiV1A/VVFKo0zLChjnGehjdJYsWcSf//wnQqHYzzTsi/x+P8VvL2EcMNjiFZqdUVC40DCorK5iw4a1CX/+3mzQoEEAtPqjD2f+kIeQFpRwZpKxY8ehKAqNHead+UIavlCIs84an8TKrCHhLMEOHjyA4nSj5Q7j+PEyfL5Ypu2KM1m27F0OHT7MzWNascf4rr56hI8cF/ztb3/pN0GgOx98sJTfPvZrghkBQpeGIMZtmpRypcuvu74zGGcb7XPQHnnkwX5xvNDatatpamnmwiTWMBEYqCgUv7MkiVX0Pi5XGrk5ebT4G6K+T+S2RUWDrSqrX3G73QwdOozGDvudNbVdPuuscckqyzISzhJsz94SgukF6BkFGLpOaen+ZJfU5x05cogXX3iGaQNCzC2MfR5fmh3+fXwLpaWlvPbayxZU2LesXr2Cp556En2QjnaJRsyTlwBOHRWLdZRMAWOygX6uzufbt/G73/0m5SeqL3u/mEGKwqgk1qCicJ5hsG//PkpLDySxkt5n8JAhtPjqo7595LaDBw+xqqR+56yzxtMSOjHnrCkQRFEURo+2cqvm5JBwlkA+n4/jx8vRMwaiZ4QnicoPwJ6pq6vlt488iFsN8p+TWqKea3aq8wqDXDrUz+LF/+zXG9Pu3Lmd//nLEzAI9At0sHd/HysZo8MBbfv2z/jrX/+c3GIsVFVVxYGDB5hlGChJGNLsaAbhD4aP5HizkwwZMpSWwOmds9EDpzJ64OnbODT76lFVVYY1TTRq1Gh8oRD+tkUBTcEQg4sG43LF8xtk7ybhLIGOHy8Dw0B352E43CgOF+XlMu8sXnV1tTz4wH/R1FDLXdObyE/r2eTxf5/gYWp+iL/85QnWrFllUpV9h9fr5ck//xHSDbQLNOglWzMZowz0STqrV69I2cCwbVv4+KoJSa4DIB2FESh8+snmZJfSqwwdOgxfoBX/Kcc4jRo4lVGdhrM6CgcV4XD08BgN0W7kyNEAtATDw5mtIYNRo8d2dZc+S8JZAtXUVANguDJBUdCdme3XidgcP17Oz+//GXU1x/m/MxsZk93z1WV2FX46o5lJuUGefPJx3n//HRMq7TteeeVFaqqrCc3u+bFMZjMmG5ALf336f2htTb1jz44fL8OuKPSWTReKMCgvL+93q2W7MnTocACavdEdM9bsr2PosOFWltTvDB8+AgiHM0038ASD7delGglnCVRd3RbOnOH9CDRHBpVVVcksqU86dOggP7//Z3gaq7lvVhPjc81b9u+ywd0zm5k5IMjf/vYUixa92i8+oHbu3M677/0LfaxOr0kIHamgnavR1NTIM8/8NeX+ThobG8lUlKQPaUZkAD6/L+Xn+cViWFvQavLVdntbXddo9tanbHBIltzcPNLd6bQGNVrbFm8NS9EALOEsgQ4fPohid2E43ADo7hyOl5cRCES/9UN/V1q6n18+cA+2QBMPnNPI2Jzug5lhQL1fpbzVxopjzm5PZnDa4M7pLXxhsJ9//OMlXn75hZQLAx3V1dXy+B8fRclSMKb14v/OPNAn6axZs4ply95LdjWmysrKptUwMKLaEM56HiDN5cLptGjH4T5o0KBCnE4Xjd6abm/b4m9ANzSGDZNwZiZFURg8ZAieUAhP2yHoQ4YMTXJV1pBwliCGYbB126cEMwuJzFrXswajaSF27dqR5Or6hrKyY/z6V/fjNrz88pxGhmZEd87dijInlV4bTUGV50oyWFHW/QeOTYVbJ3u4fKifN998nTffTM1NagOBAI/97jc0tzYTmtP7hjNPZUw2MIoMnnn2ryl1qsPQocMIGgYVyS6kzVFFSdkPvXipqsqwYcNpiiKcRQLciBESzsxWVDQEn057OCssLEpyRdaQcJYge/eWUFtTjZY/qv06LWcIit3JunUfJq+wPsLv9/GH3/8GJdjKvbMaGeiO/gDirdXOLr8+E1WBf5/oYW5hgFdeeYFt2z6Nqea+4OWXX+DA/n3heWY5ya4mCgro5+sY6Tq/+/3DtLa2JrsiU8yZcwE2VWVrDx7j1Fwdb86uwuCYYfCFCy/pQTWpaeTIUVENazZ6q1EUpX2emjDPoEGFeIMhvCGNjPQM3O70ZJdkCQlnCbJ69UoUm51Qh3CGaieQN4r1G9bJZrTd+Oc//8HRY8f4weRmitKjD2YAfq3rr7uiKvAfk1sZmqHzl//5U0r9PR07doTid99CH6PDMJMfPBjeNPKmm27C7XaDmVOXnBA6L0RTYyOLFr1q4gMnT3Z2Dl+48BI2Kwo1cQ5tTuzm62gYGLwHuNPSuOSSy+OqI5WNGDESb6DltBWbp2r01FBUOBiXy5WgyvqPgQML0A2DJn+IgQWpe6i8hLMECAQCrF33IcHckWA7uWsTGjiOgN+XslsEmKGmppp33nmTLwz2M21A4nfwd9lgwcQW6urreSeFdk5fvXolBgbGVAvmOQVh/vz53HHHHcyfP9/ccAaQD/ownRUrl6LrsYX13urf/u0WXK403kQhFEdAmw3kE57Mf13b17H6BNgPfPNb/05OTm4cj5DaRowYBUCjp+tV9k3+GkaNHp2Aivqf/Px8ILzH2cCBEs5ED6xZswqvp5Vgwem7GOlZReDOofjdt1N60nlPvPXWGxiaxk1jkte1mpCrMWtgkHfeXpwyRwnt2r0D8gArfrl3QHFxMU8++STFxcXWzGUbDK0trSmzV2Bubh7f/+EdHMHgbYh5cYCCQjZQAJxH7Cs/D2HwjqIwffpMrrpqfkz37S9GjhwFQIP3zOEspAVo9ja078klzJWbm9fp5VQj4cxiXq+XV197BSOzAD27kzPWFAV/0TQO7N/Lpk3SPTtVbW0Ny5e9xxcG+2OaZ2aFr4z20tzSynvvvZ3UOszS0tKM0cONe8/IEX7vL1q0KBxmLQhnkdpTZd4ZwLx5F3HTTd/gU2A5sQe0eJVh8LKiUFg4mLvvvg+brZfsQNzL5ObmkZmZRaPnzIsCGr21gNHeZRPmGjx4KBkZmSiKkpIHnkck+XCW1Pfaay9TX1eDb/K17as07dX7AAgVjGv7czzOqt387ZmnmD59JhkZGUmrt7d5/vm/YeghvjI6+XO9zsoJd8/eWPQqF110aZ9vqdvtDkj+yxq/trmDNltq/Rj7+te/RVNTY/t2IVdg7ZFOZRi8oChkDRjIAw8+QkZGjKfc9yOKojBq1GiOHzpz56yxrasW6bIJc2VlZfHCC+G5pkq85/X1AdI5s9CRI4d5550lBAdNDA9ftrFX78FevefEDRUV36gv0NhQzz//+UoSKu2dVq36gA0b1nH9KC+Dktw1i/jOBA9GyM9/P/F7QqHEz38zU1FhEWqLSi/ZWitmSnP4B3NBwaAkV2IuVVX5z//8IVdccTVrgWJAt+gv6SAGzykKmfkD+PVvfsegQan1Wlph5MjRNHprzjgNpdFTjdPpkjM1LaQoSkoHM5BwZqk333wdbA4Cw87t9rZ6ZgHBgeN4f2kxzc1NCaiudysp2cXTf/0zk/NDXDeq97R3Brl1bpnYyq7du3jmmaf69DzBmTPPwWgx4PSznHs/A9RjKqPHjCEnpy/sARIbVVW57bYf8eUv38BHwBuAZnJAK8HgRUVh4OAhPPLbP0qYiNKIESMJaUFa/Z3/w2n01jBi+EhUVT5eRfzk3WMRn8/Hxo3rCeSPAUdaVPcJFU0hFAyyceN6i6vr3crKjvHoIw8ywBXkx1NbsPWyd+m8wQGuG+Vl+fL3Wbz4n8kuJ25z536B9Ix0bNts5nfPTp2yZPIUJuWwAvVw1ZWpO3FdURT+7d++x7e+9e98DrwMBEz6i/oUg38AI0eP5eFH/sCAAb3xzK7eacSIkQBnPCmgyVfDiJEjE1mSSEG97GMvdWzevJFgMEBowJio76OnDwB3DmvWrLKwst6tubmJRx9+AJvm4f/ObCLL2fMPI29IOWnPLW+o5+3wm8f6uKDIzyuvvMiGDWt7/HjJkJWVxS0LboMaUD5XTA1oxhCjy697pA5s22xMmDiJyy+/0rzH7YUUReHGG7/GD35wB/sVhecVBW8P/6I2YPAmMHX6TH7168fIzk69zqOVhg8/czjzBVvxBT0y30z0mIQzCwQCAf75+j/AnYue1ckKzTNRFPwFE9i9eyeffdaTvcL7rmeeeYrqmip+Oq3JtHlmnpBy0p5bHhPCmarAf072MC5X4///yxPU1nZ/pEtvdMkll/OlL12LuldF2WleQDPGGBiZBobLQD9bxxhj0gPXg32dnQG5A7n7Z/f1m6GjL37xKu6++z7KFZXnFIXWOP6iDAxWtW0yO2fOPO6//6HwBsEiJm63m4EDCjoNZ03e8OkBkQAnRLwS/pPt+PHjfOc73+Gaa65h/vz5vPDCCwA0NDSwYMECrrzyShYsWEBjY2OiSzOFpmn85S9PcLy8DN+IOe0rNKMVKpwM6Xn86f/7PeXlZRZV2Tvt27eHdevW8JVRXsblxrCNfzfS7cZJe26l280JCg4Vvj+5BS3o57XXXjblMRNNURRuueU2LrvsCtTdKsonCpiRiRXADWSDMdbAlMWGFWD/0E5eZh4PPfgo+fkDTHjQvmPOnHncd/8vqbHZeF5R8MQY0FYDKwkH8rvu+i8cjl5+kGovNmLkSJo7OcYpEtiGDTg4vXUAAB/5SURBVJNjm0TPJDyc2Ww27r33Xt59911ee+01XnnlFfbv38/ChQuZO3cuy5YtY+7cuSxcuDDRpfVYMBjkv//7cdat+5DA8NlouXGciaPa8Zx1Oa2+IL/4xX9x6NBB8wvtpTZsWIddhatHmLsAwG03Ttpzy21SOAMoTNc5r8DHpo1r0TTzAmUiqarKD3/4E7761a+jHlSxrbaBJ9lVdWCAskvBts7G8CEj+N1jTzB48JBkV5UUs2ady333P0SNqvKSouCPMqBtwGAlcPHFl3H77XfKPmY9NGzYCJp99ejGyb/JNPlqcbvT+90vDsJ8CQ9ngwYNYsqUKQBkZmYyZswYKisrWbFiBddffz0A119/PR988EGiS+uR+vo6HnzoftavX0Ng+GyCQ2bE/ViGO5fWidfQ6Atw3/13s2XLJhMr7b0aGurId0N6H9u2amimRqvH26e31lAUhW9+89+4667/wtXqwv6BPTzpPtmLUVvA9qENdafKRRdeyqOP/LHff/DNmDGLu++5n3JgEd1vs7ErMpR5/gXcfvud/WYo2ErDhg1H00N4/CeP8DR76xg6dFjKb/MgrJfUj8Fjx46xe/duZsyYQW1tbfseOwUFBdTWnt4yPpXNppCbm/wT6Tdv3sxjv3uMpuZmfGddijZgbI8f00jPwzP5yxj7lvPYY7/hq1+9ie997z9S+iDdgQMH0OhTCOrhIcO+osankuZyMXBgdp/vSMyffzUzZkzl0d8+zJ7NezCOgD5Lg0TvS6qDslfBtstGmjONH93zY6666mr50GtzxRWX0drayJ///CQrgS+2XX/2KberxOANRWHCuHH88sFfpvTPj0SaMCH8M77ZV09m2okjhFoC9cwcc0Gv+FwSfVvSwllrayt33HEH999/P5mZJ//kj3aDOU0zaGhI3vhLXV0tL730HGvWrMJIz8M7+csY6fmmPb7hzMAz6Vqchz/ijTcWsX79Br73vds4++zu903riyZOnMrixW+wrcbB7EFmn5RtjZAOn1SnMXXaTJqb/ckuxxSZmfn85td/YOnSd/n7358jsCyANl7DmGgk5idGBdg/s2M0Gcw+fw7/+R8/ID9/AI2NqXGmqVkuvvhKdu0qYeWKZZyFwSgUZnWY3BfCYJGikJaZxd33/AKvV8Pr7U3j1X1XdnZ465EmXx2DCa/ID2oBPP5mBg4sTOrnkujdCgqyorpdUsJZMBjkjjvu4LrrruPKK8NL4QcMGEBVVRWDBg2iqqqq/eT53qi5uYm33lrMO8X/IhgKERgyk+DQmaBa8HKqdgKj5xHKH0nF4Y088siDTJ02g2/+n+8wYcIk858viWbOPIdBAwfy9iGNcwuCsa6lSIq1x500+OHqq69JdimmstlsXHPNdZx//lxeeuk51q5djXJYITQjBEMxZ4L/qVpB3aailCsMKirkez/6PuecM9uCJ0oNiqKwYMGt7Ph8G2/W1PBjQ8fe4S9mLVBhGNx7+0/6/VCw2bKzc0hPz6DFV99+XYs/fHnIkKHJKkukkIQPHhmGwc9//nPGjBnDggUL2q+/7LLLWLJkCQBLlizh8ssvT3Rp3WpoqOell/6XW29bwJtvvo43axieaV8lOPxca4JZB3rOMFqn3oh/xBx2luzl/vvv5qGHfs7Ondstfd5Estls3Pz1b1PaZGNDhTPZ5XTLE4I3SjMYP34CM2eek+xyLDFgwEDuvPMeHn749wwvGIFtow11nQpmnjWug1KiYF9mx1nj4tvf/i7//cRfJZhFwe12c9sP7qDO0Nnc4fpmDNYpCnPnzmP27DlJqy+VDR48pD2QAbT4wicGFBXFsH2SEGeQ8M7ZJ598wltvvcX48eP5yle+AsBdd93Frbfeyp133smiRYsYMmQITzzxRKJLO6P6+jqWLFnE0qXvEQwFCeWPITB+JkZ6Xvd3NpNqIzR4Ki2DJmCv2s2OPTvY/st7mThpCt/4+reYNi3+RQi9xSWXXM7S99/h7/v2M3VAkBwTNqG1yj/2pdMYULjve99P+blQkyZN4fE//Jn33nubV/7xIoHlAbQZGsaoHm6T0Qy2zTaog9nnnc8tt9yWcmdlWm3GjFlMnz6TNds/Y7Zh4EBh3f9r787joqz3/o+/rmuGZdgElMUQF9xFJHLDfSUhNUzcypOa1a/uFrrVIk4qJY+0bo8t53S0+1TeqZ0eLT8zyajslJUdTSBTEZXMFTQBww0YYLbr/oPk1lYSmGsYPs+/5nKGmfcAzuPNdX0XwK4o3HbbXL3jua3w8PbsO51ff1xZK+VMNB2nl7MBAwbw7bff/uJ9l9c8cxUXLpwnK+sdPvwwG6vNirVtN6zXXY9masSK2pqGYjGD3YKx9BC20F5/eC00DB7Y2vejMqwPxrJvKTyWzxNPPEafPjHMmjWb6OiYa8+nM1VVuf+BhaQ9ksqaAj8ejatAbWTv8TL89vG12FniwWenvZgyZSrduvVo/BO2AAaDgUmTpjBo0FBe+PszHPy6AK1EwzHQcU2fJMpJBcNuAyZvE/cuTGXYsBFNH7qVmDJlGpn5ezkI9EFjr6IyOH6YXGJrRmFh4VTV/huH5kBVVKpqL+Dn54/JJJMBROO1sEULnKO0tIT338/iX//66MdS1hVrRByad+O3OTGWHUKtrdvY3OvEDkCrW3j2WqjGuv04Q3tiLCvk4JF8Mn48kzb1lmnExQ1okdPmO3bsxP+75wFWr36e/znkw529zY0afxYXYmFfucdVx41x6LyRlw/50atXb269dU6jnqslCg0NZdkTT/Hee5v45z/XoVao2IbaGj6jUwNln4L6nUqv3r1ZuDBdxkQ1UkxMLGEhoew7W4YHYNYcjB8/Qe9Ybi00NAxNc1BtqcDXqw1VtRdl83jRZKSc/cjhcJCfv5ePtmbzdV4OGkpdKWvsmbKfMJwv+tnxNZezy1QjtvC+2EJ7YSwrpPBYAStWLCO8fQRJiTcxevQ4/PwaNkPEVYwdm0BpaQkbN76JosC8nuZr3gB9XISFD096U21TSOlazdiIay9nBeeMPJ8fQFj4dTz66FKMxtb5X0hVVaZMmUaXLl1Z9cxTVH9mxjbcBr93pd8Oao6KclohKWkS8+bd3Wq/h01JVVUGDBrC1g/ew6RpmLy9W/QZ9Jbg8uV384/lrNpaQVSYe03SEvpp9Z+K5eU/8Nlnn/CvT7byw9kyFA8TteEx2MKi0bx8m/z1FIftN48bpb6k9cFw7hjflx3k1Vdf5rV/rmNI/DDGj59AdHRMixkfNWvWn3A4HGza9DYXalX+o2/lNS1QqygQ5OUgyAvGdbj2Yrb9e0/WFvoSEdGBpRnLZcNo6sY7PbXiGZZlPsb5L85jG2mDX5tobQd1h4pSqjBv3l1MnnyLU7O6u5iYWLKzs8gHBvTtJ6W3mbVrFwKAufYiml/Ej8toyHhJ0TRa5f9eTdPIz9/LBx++z+6vc9A0DXvAdVi7jsEe3BnUlr2QKKqKvV03qtt1Q60qx1hWyL+/+oovv/yc8PYRJE5IYuzYG/H1bfry2ZQURWH27Lm0bduWtWv/m8fzAvnPmEtE+DXNhugNZXXA64dNfHLKm5iYfjzyyGJ8fZ29Kqvr6tAhkqdWPMviJY9Q/uUP2Ebb4Ke9VQN1V10xu//+/2Ts2ARdsrqzqKiuV9zupmOS1qFt27q1zszWSiz2GmwOK23byuV50TRaVTnTNI3c3F28+dY/KTp5AsXTRG14P2yhPdG8A/SO1ywcvm2xdBmGpeNgjOeO8/3ZQtate4U333ydpKRJ3HLLdJcvaYmJk4iM7MQzq5azNE9hbo8qRl5ncco6aCVmlb8X+HPikkpycgqzZ89t8bsANId27ULIXPY0f35sEZd2XsQ27idniAsUlO8V7rzzXilmzeTKcXsyEaD5mUwmTN4mqi2V1FgqAQgObqdzKuEuWt5o8Wt0/vw5lmUuYeXKJykqu0Bt1EgqY2dh7TjQbYvZVQxGbCHdqe4zmeq+U6j0Cefdd/8/9z9wNzk5X+md7ndFR8fwzLNr6NE7hpcP+fLiAR/MzbyV5Y4znizJbcNZmx/p6UuZM2e+FLPfEBYWTtoji1HMCso3VzTnUlALVcaPn8BNN03WL6Cbu3K4gixH4hxBQcFUWyuotl4uZ667eLpoWVpFOTt9+hSLHk6l4EABtZ2GUBUzFVtID30uX9otmEwmpk2bhslkAnvjZg5eC4dvO2q7j6O67xQu2T1YufJJsrLecXqOPyooKJiMjOXceuvtfFXqzZLcQE5WNP3PsNYOLx/04cUDvnTu1ptnnl0tC3k2UK9efZg+/VbUYhUsgAbGvUZCQkOZP/8eveO1GjIe0jmCgoOptZqpsdatyhwY6OS1L4XbcvtyVltbw5NPPs6lqhqq+kzGFh4Nin5vW7FZmDhxIqmpqUycOBHF5vxydpnDtx3mPpOwBXdhw4b/IS8vR7csDWUwGJg2bRaZmU9j8wrmibwAPj/ddLsJlJhVnvi6DV9870VKykwyM/9LzkL8QcnJKQQFB4MZqAXtksa8uXfJpttOcPmPiKAgKQnOEBgYRK3dTI3V/ONxoM6JhLtw+zFnH36YTVlZCdW9J6L56D9YUzN6kp2dDUB2djaa0aRvINVIbdfRGGou8uq6lxkwYFCLmM3Zp09fVj2zmuefX8kr+Xs5fsnI7T3NGBvRu/PLjfy9wB/V05clS9KIi3PPDeabm5eXF4kTJvLGG6+hOTTahYTImUcnWbQonaqqSlkI1UnatGlDrdVMrc2Mh4cH3t46f54Lt+H2Z87y8/ei+QTjCHCRLTUMnlRXV7Nx40aqq6vB4AJ7SKoGLCG9KC05w9mzZ/VO02Bt2rRhyZJMpkxJ4dPTXqzc60+V9dqK5b+KvfjLXn9C2ndi5V/+JsWskeLjhwGg2BXiBw+TsXpO4uHhIZfWnCggoA0WWy3Vlkr8/QJaxB+2omVw+3IWFBSEWluBYi7XO4rrctgwXizGaPTAz69lLRFhMBi4/fb5PPjgQr696EXm7jacq2n4B6SmwRvfmVj/rQ/9bxjI8hXPEBYW3oyJW4eIiA71t3v1auQiy0K4KH//uslkFTXn628L0RTcvpzNnPkn/Hx98DnwHh5FOSg1Fbrm0VTjbx47lcOG8exhfAs2Y7hQzJw58/HxaZmXQ0aPHkdGxpOcs3mTuTuQMvPv/2o7NFh7yIfsk95MmHATaY8urZukIRrtyjMIsqyDcFeX/5itqr2Af0DL2oVFuDa3L2ehoaH89fk1DB86HK/SA/jsewvvQ9kYzxSg1Fxyeh57UMffPG7+AFYM507gefQL/Pa+idex7bQP8iUj40kmTrzZuVmaWN++/ViW+V/UqL4s39OGH6p//ddb0+DVQh8+/96LqVNncPfd98mlt2Yil9mEu7q8LZ7FXiMLU4sm5fYTAgDatAlkwYI05syZzyefbGXHzi85XbQLinaBqQ1W//bYAyKwB7QHD+9mzWIL7Y3HmQKwW7B26I8ttFezvh4OB2rVWQyXvsdw8XsMVWXgsGPy8WVAfDzjxt1I37793GasRNeu3Xn8iRU8npHOX/ZpZPS/iK+H9rPHbTrmzWen64rZbbfNcZv374pa6tlYIX7Plb/brr6Yt2hZWkU5u6xt23bMnDmbmTNnU1Jyhq+/zmXfvj0UHNiPpawQAM23LTb/9tgDrqsrawaPpg2hKGiePoAPtubYJFfTUM3nUC+dxnDpDB4VJWh2KygKnTp1IXZMMjfcMIDevaPddu+9qKhuPJqeQWbmEv77gC8LYyuvun93mQfvHjcxZsx4KWZO4K6/Z0L4+Pj+4m0hGqvVfmqGh7dn0qRkJk1KxmazcfTod+zfv4/9+/dRWHgIW0kBKCp2/zDsbTpgC+yIZgrEKXsG/VG2WgwXijFcPIXnpe/RLHVr7rS/LoLY+LozY3379mtVA1b79u3HvHl3sXbtP/j0inXQLloUXi70IyoqinvueUCKmRPI91i4qyvPnMkZYtGUWm05u5LRaKRnz9707NmbadNmYbFYOHToAPv27eGbPbspLsrDszgPvAOwBnbEFtQJh3+YvovZ1lZgOH8S4/mTGCpKQNPw9fMnbtBArr/+Bvr1u75+Y97WKilpMrm5X/F2YT4RPjYMisbbR0xU2w089FAaHh5NfFZUCNGqXLmumUwmEk1Jytkv8PT0JDY2jtjYOObMmU95+Q/s3p1HXl4O+fl7sZUUoHiYsARGYg/qhL1NBDT3rEtNQ6k+j/HcCTwuFKFU/QBAhw4dGTR+OgMHDqZbtx6oqtvP8WgwRVGYP/9eFiy4jwAPBz2DbLx5xIekmybRoUOk3vGEEC2ct/f/jVH28mre8cqidZFy1gBt27bjxhuTuPHGJKqrq9m7dze7du3k669zqTl7GMXggbVNB2zBXbAHRjbdODVNQzWXYyg/jueFE1B9ERSF7t17Ej/4ZgYPHkL79tc1zWu5qY4dO9G//0COFuRxnZ8dDYXk5Kl6x2oV/P0DqKhw/oxoIZzlyj+GpZyJpiTl7A8ymUwMGTKcIUOGY7VaKSjIJydnJ1/t2knlkeMoBiOWoM7Y2nXHEXDdNY1RU2orMP5wBM/yI1B9EUVVienbj/j4YQwaFE9QUHAzvDP3NWrUWHbvzmPLCRMxMTGt/nKvs6SnLyU/f6/eMYRwCm9v2TtWNB0pZ43g4eFBXFx/4uL6c/fd93HwYAE7dmzny39vp+aHI2BqQ21YNLaQHg267KlWlOBxZj/GC0WgafSJjmHUyDEMHjykVQ3mb2r9+l1ffzs29gYdk7QuvXr1kd0BhNtLTV1EcXERMTHX//6DhWggKWdNxGAwEBMTS0xMLPPn30NOzk7e27KZY0d34nUmn9qIG7C16/6LZ9KU6gt4FeVguFCMj68fibdMJyEhidDQUB3eifu5sth27hylYxIhhLsZNWqs3hGEG5Jy1gw8PT0ZMWI0w4ePoqAgn9dee5WjR7djLD9KTddR//dATcNYsh+vU7sxeXuT8qd53HTTZBm70Ixk30whhBCuTtE07efLp7cQVqudCxfMesf4XQ6Hg48//pB1617BqnpiN5pANeDwDsDj7GEGDRrCPffcL9vcNKOUlIkAbNjwtqzkLYQQQhchIQ3bg1XWXXACVVVJTJzIihWr8DGCofocSm0lHmcPM23aLNLSFksxa2Zdu3YHrp76LoQQQrgiOXPmZN988zXLlz8OwMBB8TyatkRWUHeCsrJSiotP0r//IL2jCCGEaKUaeuZMxpw5WVxc//rbkyYmSzFzktDQMEJDw/SOIYQQQvwuuazpZFeWsc6du+iYRAghhBCuSMqZjnx9/fSOIIQQQggXI+VMB5fX3ZJLmkIIIYT4KRlzpoNHH13C/v379I4hhBBCCBckszWFEEIIIZxA1jkTQgghhGiBXK6cbd++nQkTJpCQkMBLL72kdxwhhBBCCKdyqXJmt9vJzMzklVdeITs7m/fff58jR47oHUsIIYQQwmlcqpzl5+fTqVMnIiMj8fT0ZOLEiXz66ad6xxJCCCGEcBqXmq1ZWlpKeHh4/XFYWBj5+fm/+niDQSEw0McZ0YQQQgghnMKlytkfZbdrMltTCCGEEC1Ci5ytGRYWRklJSf1xaWkpYWGyH6IQQgghWg+XKmcxMTGcOHGC4uJiLBYL2dnZjB07Vu9YQgghhBBO41KXNY1GIxkZGdx1113Y7XZSUlLo3r273rGEEEIIIZxGdggQQgghhHCCho45a9HlTAghhBDC3bjUmDMhhBBCiNZOypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLkTKmRBCCCGEC5FyJoQQQgjhQlxqhwB3d+bMGdLS0igvL0dRFGbMmMHcuXP1juXWamtrmT17NhaLBbvdzoQJE0hNTdU7VqtweZePsLAw/vGPf+gdx+2NHTsWX19fVFXFYDCwadMmvSO5vUuXLrFkyRIOHz6MoiisWLGCuLg4vWO5rWPHjrFgwYL64+LiYlJTU5k3b55+oZqJlDMnMhgMpKenEx0dTWVlJSkpKQwbNoxu3brpHc1teXp6sn79enx9fbFardx2222MHDmS66+/Xu9obm/Dhg107dqVyspKvaO0GuvXryc4OFjvGK3G8uXLGTFiBH/729+wWCzU1NToHcmtRUVFkZWVBdT98Tdy5EgSEhJ0TtU85LKmE4WGhhIdHQ2An58fUVFRlJaW6pzKvSmKgq+vLwA2mw2bzYaiKDqncn8lJSV8/vnnTJs2Te8oQjSLiooK8vLy6n/HPT09CQgI0DlV6/HVV18RGRlJRESE3lGahZQznZw6dYpDhw4RGxurdxS3Z7fbSU5OZujQoQwdOlS+506wYsUKHnnkEVRVPmKc6c4772Tq1Km89dZbekdxe6dOnSI4OJg///nPTJkyhcWLF2M2y17PzpKdnc2kSZP0jtFs5JNTB1VVVaSmpvLYY4/h5+endxy3ZzAYyMrK4osvviA/P5/Dhw/rHcmtffbZZwQHB9O3b1+9o7Qqb7zxBu+++y4vv/wyr7/+Onl5eXpHcms2m42DBw9y6623snnzZkwmEy+99JLesVoFi8XCtm3bSExM1DtKs5Fy5mRWq5XU1FQmT57MjTfeqHecViUgIIDBgwfz5Zdf6h3FrX3zzTds27aNsWPHsnDhQnbt2sXDDz+sdyy3FxYWBkDbtm1JSEggPz9f50TuLTw8nPDw8Poz8YmJiRw8eFDnVK3D9u3biY6Opl27dnpHaTZSzpxI0zQWL15MVFQUd9xxh95xWoVz585x6dIlAGpqati5cydRUVE6p3JvixYtYvv27Wzbto1nn32W+Ph4Vq1apXcst2Y2m+snXpjNZnbs2EH37t11TuXeQkJCCA8P59ixY0DdGKiuXbvqnKp1yM7OZuLEiXrHaFYyW9OJdu/eTVZWFj169CA5ORmAhQsXMmrUKJ2Tua+ysjLS09Ox2+1omkZiYiJjxozRO5YQTaq8vJz7778fqBtjOWnSJEaOHKlzKve3dOlSHn74YaxWK5GRkTz11FN6R3J7ZrOZnTt3kpmZqXeUZqVomqbpHUIIIYQQQtSRy5pCCCGEEC5EypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLkSW0hBC6Kpnz57ccccdpKenA7B27VrMZjMPPvhgo587PT2d0aNHN2ol8ZKSEpYtW8bRo0dxOByMHj2atLQ0PD09gbrlcL777jtSUlIoLCwkNzcXf39/VFUlIyODuLi4Br9WTk4OHh4e3HDDDdecVwjR8smZMyGErjw9Pfn44485d+6c3lGuYrPZ0DSNBx54gPHjx/Pxxx+zdetWzGYzzz33HABnz55l//79bNmyhXnz5gGQlpZGVlYWixYtIiMj4w+9Xm5uLnv27GmOtyOEaEHkzJkQQldGo5GZM2eyfv16FixYcNV9Pz3zFRcXx549e8jJyeGFF17A39+fw4cPk5SURI8ePdiwYQO1tbWsXr2ajh07ArBz505eeuklqqqqSE9PZ8yYMdjtdlatWkVubi4Wi4XZs2cza9YscnJy+Otf/0pAQADHjx/niSeewMvLi5SUFKBun9bHHnuMcePGkZqayvz58yktLSU5OZmlS5delX3gwIEUFRUB8Pbbb/PWW29htVrp1KkTK1euxGQykZ6ejqenJ4cOHSIsLIw9e/agqirvvfceS5cu5ezZs6xevRpVVfH39+f1119v7h+HEMIFSDkTQuhu9uzZ3Hzzzdx1110N/prCwkI++OADAgMDGTduHNOnT2fjxo2sX7+e1157jcWLFwNw+vRpNm7cSFFREXPmzGHo0KFs3rwZf39/3nnnHSwWC7NmzWLYsGEAHDx4kC1bthAZGcmGDRuIjo6+6nX9/Pxo3749J0+e5MUXX+Tee+8lKysLgI0bN9Y/btu2bfTo0QOAhIQEZsyYAcBzzz3Hxo0buf322wEoLS3lzTffxGAw8MILL+Dj48Odd94JwOTJk1m7di1hYWH125AJIdyflDMhhO78/PxITk5mw4YNeHt7N+hrYmJiCA0NBaBjx4715apHjx7k5OTUPy4pKQlVVencuTORkZEcO3aMHTt28O2337J161YAKioqOHnyJB4eHsTExBAZGXnN72XlypW8+OKLBAcHs3z5cgC+++47nn/+eSoqKqiqqmL48OH1j09MTMRgMPzic8XFxZGenk5SUhIJCQnXnEkI0bJIORNCuIS5c+cydepUpk6dWv9vBoMBh8MBgMPhwGq11t93eUA+gKqq9ceqqmK32+vvUxTlqtdRFAVN01iyZAkjRoy46r6cnBx8fHzqj7t161Zf4C6rrKzkzJkzdOrUifLy8p+9j7S0tJ9NQEhPT2fNmjX06tWLTZs2kZubW3+fyWT6le8IZGZmsm/fPj7//HNSUlJ45513CAoK+tXHCyHcg0wIEEK4hMDAQBITE6+6NBgREcGBAweAusuEV5azhvroo49wOBwUFRVRXFxMly5dGD58OG+88Ub98x0/fhyz2fyzrx0yZAjV1dVs3rwZqNtU/Omnn+aWW275zVL1U1VVVYSEhGC1WtmyZcuvPs7X15eqqqr646KiImJjY3nooYcICgqipKSkwa8phGi55MyZEMJlzJ8//6pB7zNmzOC+++7j5ptvZsSIEVed1Wqo9u3bM23aNKqqqli2bBleXl5Mnz6d06dPM3XqVDRNIygoiDVr1vzsaxVFYfXq1Sxbtow1a9bgcDgYNWoUCxcu/EMZHnroIaZPn05wcDCxsbFXFbArjRkzhtTUVD799FOWLl3KunXrOHnyJJqmER8fT69evf7w+xdCtDyKpmma3iGEEEIIIUQduawphBBCCOFCpJwJIYQQQrgQKWdCCCGEEC5EypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLuR/AR4Aa/iH1ImqAAAAAElFTkSuQmCC\n", 489 | "text/plain": [ 490 | "
" 491 | ] 492 | }, 493 | "metadata": {}, 494 | "output_type": "display_data" 495 | } 496 | ], 497 | "source": [ 498 | "plt.figure(figsize=(10,5))\n", 499 | "seaborn.violinplot(data=df, x='NumberOfParts', y='Length')" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 161, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "" 511 | ] 512 | }, 513 | "execution_count": 161, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | }, 517 | { 518 | "data": { 519 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAE1CAYAAABAwFwJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGotJREFUeJzt3X9s1IX9x/HXcaVQpOUotncQKyrDrWFSF5VC2kVXdi1YGK20mX9s+QJzbGiCFcQoBsUJbEa2ka3ZhOhkGJfIr0K0Rn6UQemgOGWCYeCGs0m70SsU+oNSOO76+f7B1/vqRAtH+75eeT7+kms/93m/c9I+ufv06nIcxxEAAADMDIj1AAAAANcbAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgLGEWA/wVbq6uhQO271Rv9vtMj2fNfaLb/15v/68m8R+8Y794pf1bgMHuq/4c/t0gIXDjlpazpmdz+MZYno+a+wX3/rzfv15N4n94h37xS/r3dLSkq/4c3kJEgAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAw1qd/F2S0hqYkKWlQdKtdze9x+lTnhZDOtnVGdT4AAHD96ZcBljQoQbc8WWl2vrpfFOqs2dkAAEC84yVIAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMauKMDWrl2rwsJCTZs2TQsWLNCFCxdUX1+v0tJS+f1+lZWVKRgMSpKCwaDKysrk9/tVWlqqhoaGyP2sXr1afr9fBQUF2rt3b+9sBAAA0Md1G2CBQEDr1q3Tpk2b9NZbbykcDquyslIrV67UrFmztGPHDqWkpGjjxo2SpA0bNiglJUU7duzQrFmztHLlSknS8ePHVVlZqcrKSr388st67rnnFA6He3c7AACAPuiKngELh8M6f/68QqGQzp8/r7S0NNXW1qqgoECSVFxcrKqqKknSrl27VFxcLEkqKCjQ/v375TiOqqqqVFhYqMTERGVkZGj06NE6fPhwL60FAADQdyV09wler1dz5szRd77zHQ0aNEg5OTkaN26cUlJSlJBw6XCfz6dAICDp0jNmI0eOvHTnCQlKTk7WmTNnFAgElJWV9bn7/fSYL+N2u+TxDIl6OUvxMKfbPSAu5owW+8Wv/rybxH7xjv3iV1/erdsAa21tVVVVlaqqqpScnKxHH33U7PqtcNhRS8u5qz4uLS25F6b5atHMac3jGRIXc0aL/eJXf95NYr94x37xy3q3q+mPbl+C3Ldvn2666SalpqZq4MCBys/P18GDB9XW1qZQKCRJamxslNfrlXTpma0TJ05IkkKhkNrb2zV8+HB5vV41NjZG7jcQCESOAQAAuJ50G2CjRo3SoUOH1NnZKcdxtH//fn3ta19Tdna2tm3bJkmqqKhQXl6eJCkvL08VFRWSpG3btmnixIlyuVzKy8tTZWWlgsGg6uvrVVdXp/Hjx/fiagAAAH1Tty9BZmVlqaCgQMXFxUpISFBmZqa+//3v67777tNjjz2mVatWKTMzU6WlpZKkkpISLVq0SH6/X8OGDdOvf/1rSdLYsWM1depU3X///XK73XrmmWfkdrt7dzsAAIA+yOU4jhPrIb7MxYvhqK8Bu+XJyl6Y6PLqflGokyfbzc4Xrf78Or/EfvGsP+8msV+8Y7/4FdfXgAEAAKBnEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGrijA2traNH/+fE2ZMkVTp07V3/72N7W0tGj27NnKz8/X7Nmz1draKklyHEfLli2T3+/X9OnTdeTIkcj9VFRUKD8/X/n5+aqoqOidjQAAAPq4Kwqw5cuX69vf/rbeeecdbd26VWPGjNGaNWs0adIkbd++XZMmTdKaNWskSdXV1aqrq9P27dv1/PPPa+nSpZKklpYWlZeXa/369dqwYYPKy8sj0QYAAHA96TbA2tvb9de//lUlJSWSpMTERKWkpKiqqkpFRUWSpKKiIu3cuVOSIre7XC7deeedamtrU1NTk2pqapSTkyOPx6Nhw4YpJydHe/fu7cXVAAAA+qaE7j6hoaFBqampeuqpp3Ts2DGNGzdOTz/9tJqbm5Weni5JSktLU3NzsyQpEAjI5/NFjvf5fAoEAl+43ev1KhAI9PQ+AAAAfV63ARYKhfT3v/9dS5YsUVZWlpYtWxZ5ufFTLpdLLperx4dzu13yeIb0+P32hniY0+0eEBdzRov94ld/3k1iv3jHfvGrL+/WbYD5fD75fD5lZWVJkqZMmaI1a9ZoxIgRampqUnp6upqampSamirp0jNbjY2NkeMbGxvl9Xrl9Xr17rvvRm4PBAKaMGHCV547HHbU0nLuqpdKS0u+6mOuVTRzWvN4hsTFnNFiv/jVn3eT2C/esV/8st7tavqj22vA0tLS5PP59K9//UuStH//fo0ZM0Z5eXnasmWLJGnLli2aPHmyJEVudxxHH3zwgZKTk5Wenq7c3FzV1NSotbVVra2tqqmpUW5ubjT7AQAAxLVunwGTpCVLlujxxx/XxYsXlZGRoZ///Ofq6upSWVmZNm7cqFGjRmnVqlWSpHvvvVd79uyR3+9XUlKSVqxYIUnyeDx6+OGHIxfzP/LII/J4PL20FgAAQN/lchzHifUQX+bixXDUL0He8mRlL0x0eXW/KNTJk+1m54tWf36aWWK/eNafd5PYL96xX/yK65cgAQAA0LMIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMJYQ6wFw9UYMS9CAxKSojk1LS77qY7qCnWpuDUV1PgAA8EUEWBwakJgkLR1md76lrZLazc4HAEB/x0uQAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgLErDrBwOKyioiL95Cc/kSTV19ertLRUfr9fZWVlCgaDkqRgMKiysjL5/X6VlpaqoaEhch+rV6+W3+9XQUGB9u7d28OrAAAAxIcrDrB169ZpzJgxkT+vXLlSs2bN0o4dO5SSkqKNGzdKkjZs2KCUlBTt2LFDs2bN0sqVKyVJx48fV2VlpSorK/Xyyy/rueeeUzgc7uF1AAAA+r4rCrDGxkbt3r1bJSUlkiTHcVRbW6uCggJJUnFxsaqqqiRJu3btUnFxsSSpoKBA+/fvl+M4qqqqUmFhoRITE5WRkaHRo0fr8OHDvbETAABAn5ZwJZ+0YsUKLVq0SB0dHZKkM2fOKCUlRQkJlw73+XwKBAKSpEAgoJEjR16684QEJScn68yZMwoEAsrKyorcp9frjRzzZdxulzyeIVe/VQzEy5zRiof93O4BcTFntPrzfv15N4n94h37xa++vFu3AfbnP/9Zqamp+uY3v6kDBw5YzBQRDjtqaTl31celpSX3wjRfLZo5o9Xf94uWxzMkLuaMVn/erz/vJrFfvGO/+GW929V8f+42wA4ePKhdu3apurpaFy5c0NmzZ7V8+XK1tbUpFAopISFBjY2N8nq9ki49s3XixAn5fD6FQiG1t7dr+PDh8nq9amxsjNxvIBCIHAMAAHA96fYasIULF6q6ulq7du3Sr371K02cOFG//OUvlZ2drW3btkmSKioqlJeXJ0nKy8tTRUWFJGnbtm2aOHGiXC6X8vLyVFlZqWAwqPr6etXV1Wn8+PG9uBoAAEDfFPX7gC1atEivvvqq/H6/WlpaVFpaKkkqKSlRS0uL/H6/Xn31VT3++OOSpLFjx2rq1Km6//779dBDD+mZZ56R2+3umS0AAADiyBVdhP+p7OxsZWdnS5IyMjIibz3xWYMGDdJvfvObyx4/b948zZs3L4oxAQAA+g/eCR8AAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCs2wA7ceKEfvjDH+r+++9XYWGh/vjHP0qSWlpaNHv2bOXn52v27NlqbW2VJDmOo2XLlsnv92v69Ok6cuRI5L4qKiqUn5+v/Px8VVRU9NJKAAAAfVu3AeZ2u/Xkk0/q7bff1htvvKE//elPOn78uNasWaNJkyZp+/btmjRpktasWSNJqq6uVl1dnbZv367nn39eS5culXQp2MrLy7V+/Xpt2LBB5eXlkWgDAAC4nnQbYOnp6Ro3bpwkaejQobrtttsUCARUVVWloqIiSVJRUZF27twpSZHbXS6X7rzzTrW1tampqUk1NTXKycmRx+PRsGHDlJOTo7179/biagAAAH3TVV0D1tDQoKNHjyorK0vNzc1KT0+XJKWlpam5uVmSFAgE5PP5Isf4fD4FAoEv3O71ehUIBHpiBwAAgLiScKWf2NHRofnz52vx4sUaOnTo5z7mcrnkcrl6fDi32yWPZ0iP329viJc5oxUP+7ndA+Jizmj15/36824S+8U79otffXm3Kwqwixcvav78+Zo+fbry8/MlSSNGjFBTU5PS09PV1NSk1NRUSZee2WpsbIwc29jYKK/XK6/Xq3fffTdyeyAQ0IQJE77yvOGwo5aWc1e9VFpa8lUfc62imTNa/X2/aHk8Q+Jizmj15/36824S+8U79otf1rtdzffnbl+CdBxHTz/9tG677TbNnj07cnteXp62bNkiSdqyZYsmT578udsdx9EHH3yg5ORkpaenKzc3VzU1NWptbVVra6tqamqUm5t7tbsBAADEvW6fAXv//fe1detW3X777ZoxY4YkacGCBZo7d67Kysq0ceNGjRo1SqtWrZIk3XvvvdqzZ4/8fr+SkpK0YsUKSZLH49HDDz+skpISSdIjjzwij8fTW3sBAAD0Wd0G2N13362PPvrosh/79D3BPsvlcunZZ5+97OeXlJREAgwAAOB6xTvhAwAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGEuI9QDAfxvqGaikgYOjOjYtLfmqj+m8eF5nWy5Gdb5oDB86UAlJdvuFOs/rzFm7/QAA3TMPsOrqai1fvlxdXV0qLS3V3LlzrUdAH5c0cLDu+OMdZuf78H8+1FnZBUpC0mAd/Uam2fkyjx2VCDAA6FNMX4IMh8P62c9+ppdfflmVlZV66623dPz4ccsRAAAAYs40wA4fPqzRo0crIyNDiYmJKiwsVFVVleUIAAAAMWf6EmQgEJDP54v82ev16vDhw5YjAOhFw1KSlDgoui8r0VzfFrwQUmtbZ1Tni8aw5EQlDh4U1bFR7Xf+glrbg1GdLxqpKUlyGz5+4QshnTZ8/IC+pE9fhD9woDuqv9SSVPeLwh6e5qtFO2fUlraans56vw//50PT81nvl3nsqOn5zP//NJI4KKHf7iZJiYMHKS3K4IsH7jh6/OJlzmj15/366m6mL0F6vV41NjZG/hwIBOT1ei1HAAAAiDnTALvjjjtUV1en+vp6BYNBVVZWKi8vz3IEAACAmDN9CTIhIUHPPPOMHnroIYXDYc2cOVNjx461HAEAACDmXI7jOLEeAgAA4HrCryICAAAwRoABAAAYI8AAAACMEWAAAADG3EuXLl0a6yFi5eOPP9ZHH32k1NRUJSYmRm6vrq7W6NGjYzhZzzh8+HDkvdaOHz+urVu3qq2tTbfcckusR+txTzzxhPx+f6zH6BGHDh3S0KFDlZiYqPPnz+t3v/udXn31VR09elTjx4/XoEHx/cac69atU3p6upKT++abI/a09957T9u2bVNHR4duvvnmWI9zzYLBoN58802dPn1aGRkZevPNN/XGG2+ooaFBmZmZcrvdsR4RX6G+vl4bN27U22+/rb/85S9qaGjQrbfe+rnvgbBx3f4U5Lp16/T6669rzJgxOnbsmBYvXqzvfve7kqTi4mJVVFTEeMJrU15erurqaoVCIeXk5OjQoUPKzs7Wvn37lJubq3nz5sV6xKj99Kc//cJtBw4cUHZ2tiTppZdesh6pRxUWFmrr1q1KSEjQkiVLNHjwYBUUFKi2tlbHjh1TeXl5rEe8JnfddZeSkpJ08803q7CwUFOnTlVqamqsx+oxJSUl2rhxoyRp/fr1ev311+X3+1VTU6O8vDzNnTs3xhNem4ULFyocDuv8+fNKTk7WuXPn5Pf7VVtbK8dx9MILL8R6RHyJdevWaffu3br77rtVXV2tzMxMpaSkaMeOHXr22WcjX0NhxLlOTZs2zTl79qzjOI5TX1/vFBcXO2vXrnUcx3FmzJgRy9F6xLRp05xQKOScO3fO+da3vuW0t7c7juM4nZ2dzrRp02I83bUpKipyFi5c6NTW1joHDhxwamtrnZycHOfAgQPOgQMHYj3eNZsyZUrkv4uKij73se9973vW4/S4GTNmOOFw2Nm7d6/z1FNPOdnZ2c6cOXOczZs3R/4/jWef/frxwAMPOM3NzY7jOE5HR0fc/91zHCeyw8WLF51JkyY5oVDIcRzH6erq6hf7fZUf/ehHsR7hmnz6fcFxHOfcuXPOD37wA8dxHOff//53v/i+5ziO09bW5rz44otOQUGBc8899zgTJkxwpkyZ4rz44otOa2trrMf7nD79uyB7U1dXl2644QZJ0k033aTXXntN8+fP13/+8x85/eBJQbfbLbfbHXmmYejQoZKkwYMHa8CA+L70b9OmTVq3bp1eeuklPfHEE8rMzNSgQYM0YcKEWI/WI8aOHatNmzZp5syZ+sY3vqEPP/xQd9xxhz755BMlJMT/X1mXy6UBAwYoNzdXubm5unjxoqqrq1VZWakXXnhBtbW1sR7xmnR1dam1tVVdXV1yHCfy7N6QIUP6xctzjuMoGAyqs7NTnZ2dam9vl8fjUTAYVCgUivV41+zIkSOXvd1xHB07dsx4mp4XDofldrsVDAbV0dEhSRo1alS/eOwkqaysTNnZ2XrttdeUlpYmSTp58qQqKipUVlamP/zhDzGe8P/F/1fzKI0YMUJHjx5VZmamJOmGG27Q6tWrtXjxYv3jH/+I8XTXbuDAgers7FRSUpI2b94cub29vT3uA2zAgAGaNWuWpkyZohUrVujGG29UOByO9Vg9Zvny5Vq+fLl+//vfa/jw4XrwwQfl8/k0cuRILV++PNbjXbP//gfOwIEDNXnyZE2ePFmdnZ0xmqrnnD17Vg888IAcx5HL5VJTU5PS09PV0dHRL/5xV1JSoqlTp6qrq0uPPfaYHn30UWVkZOjQoUMqLCyM9XjXrKSkRPfcc89lH6u2trYYTNRzSkpKNHPmTGVlZem9997Tj3/8Y0nS6dOnNWzYsBhP1zMaGhr0yiuvfO62tLQ0zZ07V5s2bYrRVJd33V4D1tjYKLfbHSnkz3r//fd11113xWCqnhMMBi97UeXp06d18uRJff3rX4/BVL1j9+7dOnjwoBYsWBDrUXrU2bNn1dDQoFAoJJ/PpxtvvDHWI/WITz75RLfeemusxzDX2dmpU6dOKSMjI9ajXLNAICBJ8nq9amtr0759+zRq1CiNHz8+xpNdu2nTpqm8vPyyP6x07733as+ePfZD9aB//vOf+vjjjzV27FiNGTMm1uP0uDlz5mjSpEkqLi6OfM08deqUNm/erH379mnt2rWxHfAzrtsAAwDgv73zzju6/fbbddttt33hYzt37oz8sBb6ptbWVq1Zs0ZVVVU6ffq0pEuveH36AzB96Zk+AgwAgCvw6bWZiE997fGL74uBAAAw8tvf/jbWI+Aa9LXH77q9CB8AgP82ffr0L/3YqVOnDCdBNOLp8SPAAAD4P83NzXrllVeUkpLyudsdx9GDDz4Yo6lwpeLp8SPAAAD4P/fdd586Ojoib1H0WbxTfN8XT48fF+EDAAAY4yJ8AAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAw9r+4zFev7koh3AAAAABJRU5ErkJggg==\n", 520 | "text/plain": [ 521 | "
" 522 | ] 523 | }, 524 | "metadata": {}, 525 | "output_type": "display_data" 526 | } 527 | ], 528 | "source": [ 529 | "plt.figure(figsize=(10,5))\n", 530 | "df['TLD'].value_counts().plot(kind='bar')" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 162, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "data": { 540 | "text/plain": [ 541 | "" 542 | ] 543 | }, 544 | "execution_count": 162, 545 | "metadata": {}, 546 | "output_type": "execute_result" 547 | }, 548 | { 549 | "data": { 550 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAloAAAE1CAYAAAA/EU74AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGqdJREFUeJzt3X9QVOfd9/HPcgADBVl+E70hiiVTR42ZqTZDY7CDJVTBSFViMuNMRVPG6GiRzhhtp8bQGrXp7WjMM0bGNNWM06lVAlVsavBRsbXG2LRR09ifOgOduFQU/IVBlr3/yN19HstWlLOXZ3d5v/4Je+3Z63yv7+y4n5xz9qzL5/P5BAAAgKCLcroAAACASEXQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABgS7XQBktTb2yuvNzRuUG9ZrpCpJZTQl8DoS1/0JDD6Ehh9CYy+9BVKPYmJse5625AIWl6vTx0dN5wuQ5LkdseHTC2hhL4ERl/6oieB0ZfA6Etg9KWvUOpJenriXW/LqUMAAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADAmJ3zoMloShcYobYn9J9/IbRv9J16c9unaly/Y8AAAgfEVU0IobEq0RKxqdLkOSdH5dia45XQQAAHAUpw4BAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGNLvT/CsXLlShw8fVmpqqvbt2ydJqqqq0rlz5yRJV69eVWJiohoaGtTa2qpp06Zp5MiRkqTx48erpqbGYPkAAAChq9+gNXPmTM2dO1cvvPCCf2zjxo3+v9etW6eEhAT/45ycHDU0NAS5TAAAgPDT76nDiRMnKikpKeBzPp9Pv/zlL1VaWhr0wgAAAMKdrWu0Tp48qdTUVI0YMcI/1traqrKyMs2dO1cnT560Wx8AAEDY6vfU4Z3s27fvtqNZGRkZOnTokJKTk3XmzBktXrxYjY2Nt51aDMSyXHK74+2UEpIiaU2WFRVR6wkW+tIXPQmMvgRGXwKjL32Fa08GHLR6enr07rvvqq6uzj8WGxur2NhYSdLYsWOVk5Ojc+fOady4cXecy+v1qaPjxkBL8UtPT7Q9RzAFY02hwu2Oj6j1BAt96YueBEZfAqMvgdGXvkKpJ/eSNwZ86vDYsWPKzc1VVlaWf+zSpUvyer2SpJaWFp0/f17Z2dkD3QUAAEBY6/eIVnV1tU6cOKHLly+roKBAS5YsUXl5ufbv36+SkpLbtn3//ff16quvKjo6WlFRUXrppZfkdruNFQ8AABDK+g1aGzZsCDi+bt26PmPFxcUqLi62XxUAAEAE4M7wAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABjS728dIvylJkUrKjbO9jzp6Ym2Xt/b3aX2zh7bdQAAEC4IWoNAVGyctDrJ6TIUtbpT0lWnywAA4L7h1CEAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGNJv0Fq5cqXy8/NVWlrqH9u8ebOeeOIJzZgxQzNmzNCRI0f8z23dulVFRUUqLi7W0aNHzVQNAAAQBqL722DmzJmaO3euXnjhhdvG582bpwULFtw29te//lWNjY1qbGyUx+NRRUWFfvWrX8myrOBWDQAAEAb6PaI1ceJEJSUl3dVkBw8eVElJiWJjY5Wdna2HHnpIp06dsl0kAABAOOr3iNZ/snPnTtXX12vs2LFasWKFkpKS5PF4NH78eP82mZmZ8ng8/c5lWS653fEDLSVkReKa7Iq0nlhWVMStyS56Ehh9CYy+BEZf+grXngwoaD377LNatGiRXC6XNm3apHXr1mnt2rUDLsLr9amj48aAX/8v6emJtucIpmCsKRhCqS+h0pNgcbvjI25NdtGTwOhLYPQlMPrSVyj15F4+Vwf0rcO0tDRZlqWoqCiVl5fr9OnTkj47gnXhwgX/dh6PR5mZmQPZBQAAQNgbUNBqa2vz/93U1KS8vDxJUmFhoRobG9Xd3a2WlhadP39ejzzySHAqBQAACDP9njqsrq7WiRMndPnyZRUUFGjJkiU6ceKEzp49K0kaPny4ampqJEl5eXmaOnWqpk2bJsuytGrVKr5xCAAABq1+g9aGDRv6jJWXl//H7Z9//nk9//zz9qoCAACIANwZHgAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCHR/W2wcuVKHT58WKmpqdq3b58kaf369Tp06JBiYmKUk5OjtWvXaujQoWptbdW0adM0cuRISdL48eNVU1NjdgUAAAAhqt8jWjNnztS2bdtuG3v88ce1b98+7d27VyNGjNDWrVv9z+Xk5KihoUENDQ2ELAAAMKj1G7QmTpyopKSk28YmTZqk6OjPDoY9+uijunDhgpnqAAAAwli/pw77s2fPHk2dOtX/uLW1VWVlZUpISFBVVZUmTJjQ7xyW5ZLbHW+3lJATiWuyK9J6YllREbcmu+hJYPQlMPoSGH3pK1x7YitobdmyRZZl6amnnpIkZWRk6NChQ0pOTtaZM2e0ePFiNTY2KiEh4Y7zeL0+dXTcsFOKJCk9PdH2HMEUjDUFQyj1JVR6Eixud3zErckuehIYfQmMvgRGX/oKpZ7cy+fqgL91WFdXp8OHD+tHP/qRXC6XJCk2NlbJycmSpLFjxyonJ0fnzp0b6C4AAADC2oCCVnNzs7Zt26YtW7YoLi7OP37p0iV5vV5JUktLi86fP6/s7OzgVAoAABBm+j11WF1drRMnTujy5csqKCjQkiVLVFtbq+7ublVUVEj6f7dxeP/99/Xqq68qOjpaUVFReumll+R2u40vAgAAIBT1G7Q2bNjQZ6y8vDzgtsXFxSouLrZfFQAAQATgzvAAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACG3FXQWrlypfLz81VaWuof6+joUEVFhZ588klVVFSos7NTkuTz+fSDH/xARUVFmj59uj766CMzlQMAAIS4uwpaM2fO1LZt224bq62tVX5+vg4cOKD8/HzV1tZKkpqbm3X+/HkdOHBA3//+97V69eqgFw0AABAO7ipoTZw4UUlJSbeNHTx4UGVlZZKksrIyNTU13Tbucrn06KOP6sqVK2prawty2QAAAKFvwNdotbe3KyMjQ5KUnp6u9vZ2SZLH41FWVpZ/u6ysLHk8HptlAgAAhJ/oYEzicrnkcrkG/HrLcsntjg9GKSElEtdkV6T1xLKiIm5NdtGTwOhLYPQlMPrSV7j2ZMBBKzU1VW1tbcrIyFBbW5tSUlIkSZmZmbpw4YJ/uwsXLigzM/OOc3m9PnV03BhoKX7p6Ym25wimYKwpGEKpL6HSk2Bxu+Mjbk120ZPA6Etg9CUw+tJXKPXkXj5XB3zqsLCwUPX19ZKk+vp6TZky5bZxn8+nP/zhD0pMTPSfYgQAABhM7uqIVnV1tU6cOKHLly+roKBAS5YsUWVlpaqqqrR7924NGzZMGzdulCRNnjxZR44cUVFRkeLi4vTyyy8bXQAAAECouqugtWHDhoDj27dv7zPmcrn04osv2qsKAAAgAnBneAAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIZED/SFf//737Vs2TL/45aWFi1dulRXr17Vrl27lJKSIkmqrq7W5MmT7VcKAAAQZgYctHJzc9XQ0CBJ8nq9KigoUFFRkerq6jRv3jwtWLAgaEUCAACEo6CcOvztb3+r7OxsDR8+PBjTAQAARISgBK3GxkaVlpb6H+/cuVPTp0/XypUr1dnZGYxdAAAAhB2Xz+fz2Zmgu7tbTzzxhBobG5WWlqaLFy8qOTlZLpdLmzZtUltbm9auXXvHOXp7e+X12ipDkhQTY2nEikbb8wTD+XUlunXL63QZkj7ri1YnOV2GtLozZHoSLJYVJa+31+kyQgo9CYy+BEZfAqMvfYVST2JirLvedsDXaP1Lc3OzxowZo7S0NEny/1eSysvLtXDhwn7n8Hp96ui4YbcUpacn2p4jmIKxpmAIpb6ESk+Cxe2Oj7g12UVPAqMvgdGXwOhLX6HUk3v5XLV96rCxsVElJSX+x21tbf6/m5qalJeXZ3cXAAAAYcnWEa0bN27o2LFjqqmp8Y+98sorOnv2rCRp+PDhtz0HAAAwmNgKWvHx8XrvvfduG3vllVdsFQQAABApuDM8AACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQ6LtTlBYWKjPfe5zioqKkmVZqqurU0dHh5YtW6Z//OMfGj58uDZu3KikpKRg1AsAABA2gnJEa/v27WpoaFBdXZ0kqba2Vvn5+Tpw4IDy8/NVW1sbjN0AAACEFSOnDg8ePKiysjJJUllZmZqamkzsBgAAIKTZPnUoSQsWLJDL5dKcOXM0Z84ctbe3KyMjQ5KUnp6u9vb2O77eslxyu+ODUUpIicQ12RVpPbGsqIhbk130JDD6Ehh9CYy+9BWuPbEdtH76058qMzNT7e3tqqioUG5u7m3Pu1wuuVyuO87h9frU0XHDbilKT0+0PUcwBWNNwRBKfQmVngSL2x0fcWuyi54ERl8Coy+B0Ze+Qqkn9/K5avvUYWZmpiQpNTVVRUVFOnXqlFJTU9XW1iZJamtrU0pKit3dAAAAhB1bQevGjRu6du2a/+/f/OY3ysvLU2Fhoerr6yVJ9fX1mjJliv1KAQAAwoytU4ft7e1avHixJMnr9aq0tFQFBQUaN26cqqqqtHv3bg0bNkwbN24MSrEAAADhxFbQys7O1i9+8Ys+48nJydq+fbudqQEAAMIed4YHAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBBbv3UIhKsEd4ziYh4Iylzp6Ym2Xt9166auddwKSi12JSfEKDrOfl/s9kSSerpu6vK10OgLAAwUQQuDUlzMAxq3fZzTZUiSTn/jtK4pNAJFdNwD+vgLo50uQ5I0+uzHEkELQJjj1CEAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGBI90Bd+8sknWr58udrb2+VyufT000/rG9/4hjZv3qxdu3YpJSVFklRdXa3JkycHrWAAAIBwMeCgZVmWVqxYoTFjxujatWuaNWuWHn/8cUnSvHnztGDBgqAVCQAAEI4GHLQyMjKUkZEhSUpISFBubq48Hk/QCgMAAAh3Aw5a/7/W1lZ9/PHHGj9+vD744APt3LlT9fX1Gjt2rFasWKGkpKQ7vt6yXHK744NRSkiJxDXZRU8Coy+BRVJfLCsqotYTLPQlMPrSV7j2xHbQun79upYuXarvfOc7SkhI0LPPPqtFixbJ5XJp06ZNWrdundauXXvHObxenzo6btgtRenpibbnCKZgrCkYQqkv9CQw+hJYqPQlGNzu+IhaT7DQl8DoS1+h1JN7+bfS1rcOb926paVLl2r69Ol68sknJUlpaWmyLEtRUVEqLy/X6dOn7ewCAAAgbA04aPl8Pn33u99Vbm6uKioq/ONtbW3+v5uampSXl2evQgAAgDA14FOHv/vd79TQ0KCHH35YM2bMkPTZrRz27duns2fPSpKGDx+umpqa4FQKAAAQZgYctCZMmKA//elPfca5ZxYAAMBnuDM8AACAIQQtAAAAQwhaAAAAhgTlhqUAEMmShsYpdoj9fy6DcZ+y7k971Hmly/Y8AO4PghYA9CN2SLT+z8L/63QZkqTFrxc6XQKAe8CpQwAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhkQ7XQAAIDwlJcYq9oEhtudJT0+09frum5+q82q37TqCIWVonKwhwflotdsX76c9unSlKyi1YOAIWgCAAYl9YIj+e06p02Xo2z/bJ4VI0LKGRKt1xVGny5Ak/de6J5wuATIYtJqbm7VmzRr19vaqvLxclZWVpnYFAABCWFLSEMXGxtqex+5RPknq7u5WZ+entue5W0aCltfrVU1Njd58801lZmZq9uzZKiws1Oc//3kTuwMAACEsNjZWq1evdroMSfrfOu5f0DJyMfypU6f00EMPKTs7W7GxsSopKdHBgwdN7AoAACBkGQlaHo9HWVlZ/seZmZnyeDwmdgUAABCyXD6fzxfsSd955x0dPXpUa9askSTV19fr1KlTWrVqVbB3BQAAELKMHNHKzMzUhQsX/I89Ho8yMzNN7AoAACBkGQla48aN0/nz59XS0qLu7m41NjaqsLDQxK4AAABClpFvHUZHR2vVqlV67rnn5PV6NWvWLOXl5ZnYFQAAQMgyco0WAAAA+K1DAAAAYwhaAAAAhhC0AAAADCFoAQAAGGLsR6XD1fLly/XDH/7Q6TIc9eGHH2rUqFFKSEjQzZs3VVtbqz/+8Y8aNWqUFi5cqMRE+z/qGY527NihoqIiPfjgg06XErJOnjyp06dPKy8vT5MmTXK6HMd0d3dr//79ysjI0Je//GXt3btXv//97zVq1Cg9/fTTiomJcbpEhJCWlhYdOHBAn3zyiSzL0ogRIzR9+nQlJCQ4XRqCYFB/63DhwoV9xt577z099thjkqTXX3/9fpcUEkpKStTQ0KDo6Gh973vf0wMPPKDi4mIdP35cZ8+e1WuvveZ0iY744he/qLi4OOXk5KikpERTp05VSkqK02U5avbs2dq9e7ckadeuXdq5c6eKior061//WoWFhaqsrHS4Qmd8+9vfltfr1c2bN5WYmKgbN26oqKhIx48fl8/n0/r1650uESFix44dOnz4sCZMmKDm5maNHj1aQ4cO1bvvvqsXX3zR/3mE8DWoj2h5PB6NGjVK5eXlcrlc8vl8OnPmjObPn+90aY7q7e1VdPRnb40zZ87o7bffliRNmDBBM2bMcLI0R2VnZ6uurk7Hjh3T/v37tXnzZo0ZM0alpaUqKioalP/32dPT4//7Zz/7md58802lpKRo/vz5mjNnzqANWn/+85+1d+9e9fT0qKCgQEePHpVlWZoxY4aeeuopp8sLSc8995y2bdvmdBn33c9//nPV19fLsixVVFSosrJSb731lubMmaNFixapvr7e6RIdc/XqVW3dulVNTU26dOmSXC6XUlJSNGXKFFVWVmro0KFOl3hXBnXQ2rNnj3bs2KHXX39dy5cv1+jRozVkyBB96Utfcro0R+Xl5WnPnj2aNWuWvvCFL+j06dMaN26czp075w9gg5HL5VJUVJQmTZqkSZMm6datW2publZjY6PWr1+v48ePO13ifdfb26vOzk719vbK5/P5j/DFx8fLsiyHq3OOz+dTd3e3urq61NXVpatXr8rtdqu7u/u2cDrYfPTRRwHHfT6fzp49e5+rCR1er1eWZam7u1vXr1+XJA0bNmxQv1ckqaqqSo899pjeeustpaenS5L++c9/6u2331ZVVZV+/OMfO1zh3Rm8n5qSoqKiNG/ePH3ta1/Tyy+/rLS0NHm9XqfLctyaNWu0Zs0abdmyRcnJyXrmmWeUlZWlBx980P9D4YPRv59lj4mJ0ZQpUzRlyhR1dXU5VJWzrl27ppkzZ8rn88nlcqmtrU0ZGRm6fv16n34NJrNnz9bUqVPV29urZcuW6Vvf+pays7P14YcfqqSkxOnyHDN79mxNnDgx4HvjypUrDlTkvNmzZ2vWrFkaP368Tp48qW9+85uSpEuXLikpKcnh6pzV2tqqN95447ax9PR0VVZWas+ePQ5Vde8G9TVa/+7w4cP64IMPVF1d7XQpIeHatWtqbW1VT0+PsrKylJaW5nRJjjp37pxGjhzpdBlhoaurSxcvXlR2drbTpTjG4/FIkjIzM3XlyhUdO3ZMw4YN0yOPPOJwZc4pLS3Va6+9phEjRvR5bvLkyTpy5Mj9LyoE/OUvf9Hf/vY35eXladSoUU6XEzLmz5+v/Px8ff3rX/d//ly8eNF/CcdPfvITZwu8SwQtAMB98c477+jhhx9Wbm5un+eampr01a9+1YGqEKo6OztVW1urgwcP6tKlS5Kk1NRU/xdtwuWIH0ELAOC4f10XCtyNcHq/cMNSAIDjNm/e7HQJCCPh9H4Z1BfDAwDun+nTp//H5y5evHgfK0E4iJT3C0ELAHBftLe364033uhz/yOfz6dnnnnGoaoQqiLl/ULQAgDcF1/5yld0/fp1jR49us9z3AEd/y5S3i9cDA8AAGAIF8MDAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIf8DkE+pTkvNc6oAAAAASUVORK5CYII=\n", 551 | "text/plain": [ 552 | "
" 553 | ] 554 | }, 555 | "metadata": {}, 556 | "output_type": "display_data" 557 | } 558 | ], 559 | "source": [ 560 | "plt.figure(figsize=(10,5))\n", 561 | "df.loc[df['TLD'] > 3]['TLD'].value_counts().plot(kind='bar')" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 163, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/html": [ 572 | "
\n", 573 | "\n", 586 | "\n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | "
NumberOfPartsLengthLongestPartTLDRandomness
emil.engineering21611110.0
\n", 608 | "
" 609 | ], 610 | "text/plain": [ 611 | " NumberOfParts Length LongestPart TLD Randomness\n", 612 | "emil.engineering 2 16 11 11 0.0" 613 | ] 614 | }, 615 | "execution_count": 163, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "df.loc[df['TLD'] == 11].sample()" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [] 630 | } 631 | ], 632 | "metadata": { 633 | "kernelspec": { 634 | "display_name": "Python 3", 635 | "language": "python", 636 | "name": "python3" 637 | }, 638 | "language_info": { 639 | "codemirror_mode": { 640 | "name": "ipython", 641 | "version": 3 642 | }, 643 | "file_extension": ".py", 644 | "mimetype": "text/x-python", 645 | "name": "python", 646 | "nbconvert_exporter": "python", 647 | "pygments_lexer": "ipython3", 648 | "version": "3.6.7" 649 | } 650 | }, 651 | "nbformat": 4, 652 | "nbformat_minor": 2 653 | } 654 | --------------------------------------------------------------------------------