├── certstream_analytics
├── __init__.py
├── reporters
│ ├── __init__.py
│ └── base.py
├── transformers
│ ├── __init__.py
│ └── base.py
├── storages
│ ├── __init__.py
│ ├── base.py
│ └── elasticsearch_storage.py
├── analysers
│ ├── __init__.py
│ ├── base.py
│ ├── domain_matching.py
│ └── common_domain_analyser.py
└── stream.py
├── .coveragerc
├── tests
├── opendns-top-domains.txt
├── test_stream.py
├── test_elasticsearch.py
├── test_reporter.py
├── samples.json
└── test_domain_matching_analyser.py
├── setup.cfg
├── .gitmodules
├── LICENSE
├── scripts
├── sundry
│ ├── generate_features.py
│ ├── isolation_forest.py
│ ├── elliptic_envelope.py
│ ├── lof.py
│ └── certstream-domain-features.ipynb
└── replay.py
├── .travis.yml
├── setup.py
├── .gitignore
├── bin
└── domain_matching.py
├── README.md
└── pylintrc
/certstream_analytics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=certstream-analytics
3 |
--------------------------------------------------------------------------------
/certstream_analytics/reporters/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-docstring
2 | from .base import Reporter, FileReporter
3 |
--------------------------------------------------------------------------------
/tests/opendns-top-domains.txt:
--------------------------------------------------------------------------------
1 | google.com
2 | facebook.com
3 | bankofamerica.com
4 | apple.com
5 | www.net.cn
6 | discover.com
7 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [tool:pytest]
5 | pep8maxlinelength = 120
6 |
7 | [pep8]
8 | max-line-length = 120
9 |
--------------------------------------------------------------------------------
/certstream_analytics/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-docstring
2 | from .base import Transformer, PassthroughTransformer, CertstreamTransformer
3 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "data/opendns/public-domain-lists"]
2 | path = data/opendns/public-domain-lists
3 | url = https://github.com/opendns/public-domain-lists.git
4 |
--------------------------------------------------------------------------------
/certstream_analytics/storages/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-docstring
2 | from .base import Storage
3 | from .elasticsearch_storage import ElasticsearchStorage
4 |
--------------------------------------------------------------------------------
/certstream_analytics/storages/base.py:
--------------------------------------------------------------------------------
1 | """
2 | Save certstream data into various storages, streaming or not.
3 | """
4 | from abc import ABCMeta, abstractmethod
5 |
6 |
7 | # pylint: disable=no-init,too-few-public-methods
8 | class Storage:
9 | """
10 | Define the template of all analyser class.
11 | """
12 | __metaclass__ = ABCMeta
13 |
14 | @abstractmethod
15 | def save(self, record):
16 | """
17 | Move along, nothing to see here.
18 | """
19 |
--------------------------------------------------------------------------------
/certstream_analytics/analysers/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-docstring
2 | from .base import Analyser, Debugger
3 | from .domain_matching import AhoCorasickDomainMatching
4 | from .domain_matching import DomainMatchingOption, DomainMatching
5 | from .common_domain_analyser import WordSegmentation
6 | from .common_domain_analyser import BulkDomainMarker
7 | from .common_domain_analyser import FeaturesGenerator
8 | from .common_domain_analyser import IDNADecoder
9 | from .common_domain_analyser import HomoglyphsDecoder
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Huy Do
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/certstream_analytics/reporters/base.py:
--------------------------------------------------------------------------------
1 | """
2 | Report the analysis result somewhere.
3 | """
4 | import json
5 | from abc import ABCMeta, abstractmethod
6 |
7 |
8 | # pylint: disable=no-init,too-few-public-methods
9 | class Reporter:
10 | """
11 | Define the template of all reporter class.
12 | """
13 | __metaclass__ = ABCMeta
14 |
15 | @abstractmethod
16 | def publish(self, report):
17 | """
18 | Move along, nothing to see here.
19 | """
20 |
21 |
22 | class FileReporter(Reporter):
23 | """
24 | Simply print the report to a file.
25 | """
26 | def __init__(self, path):
27 | """
28 | Note that an exception will be raised if the path is not valid or writable.
29 | """
30 | self.fhandler = open(path, 'a')
31 |
32 | def __del__(self):
33 | self.fhandler.close()
34 |
35 | def publish(self, report):
36 | """
37 | This is a very basic reporter that will only print out the record it receives
38 | to a plain text file.
39 | """
40 | if not report:
41 | return
42 |
43 | print(json.dumps(report), file=self.fhandler)
44 |
--------------------------------------------------------------------------------
/scripts/sundry/generate_features.py:
--------------------------------------------------------------------------------
1 | '''
2 | Generate features for outlier detection.
3 | '''
4 |
5 | import json
6 | import sys
7 |
8 | from certstream_analytics.analysers import WordSegmentation
9 | from certstream_analytics.analysers import IDNADecoder
10 | from certstream_analytics.analysers import FeaturesGenerator
11 |
12 | def main(max_count=None):
13 | '''
14 | The record is assumed to be stored in a JSON file passed in as the first
15 | parameter of the script.
16 | '''
17 | segmenter = WordSegmentation()
18 | decoder = IDNADecoder()
19 | generator = FeaturesGenerator()
20 |
21 | with open(sys.argv[1]) as fhandle:
22 | count = 0
23 |
24 | for line in fhandle:
25 | try:
26 | record = json.loads(line.strip())
27 | except json.decoder.JSONDecodeError:
28 | continue
29 |
30 | record = decoder.run(record)
31 | record = segmenter.run(record)
32 | record = generator.run(record)
33 |
34 | print(json.dumps(record))
35 | count += 1
36 |
37 | if max_count and count > max_count:
38 | break
39 |
40 |
41 | if __name__ == '__main__':
42 | main()
43 |
--------------------------------------------------------------------------------
/tests/test_stream.py:
--------------------------------------------------------------------------------
1 | '''
2 | Test consuming the data from the great certstream.
3 | '''
4 | import time
5 | import unittest
6 |
7 | from certstream_analytics.analysers import Debugger
8 | from certstream_analytics.transformers import CertstreamTransformer
9 | from certstream_analytics.stream import CertstreamAnalytics
10 |
11 |
12 | class CertstreamTest(unittest.TestCase):
13 | '''
14 | Test the way we consume data from certstream.
15 | '''
16 | DEFAULT_DELAY = 30
17 |
18 | def setUp(self):
19 | '''
20 | Setup the client to consume from certstream.
21 | '''
22 | self.debugger = Debugger()
23 | self.transformer = CertstreamTransformer()
24 |
25 | self.engine = CertstreamAnalytics(transformer=self.transformer,
26 | analysers=self.debugger)
27 |
28 | def test_consume(self):
29 | '''
30 | Start to consume some data from certstream.
31 | '''
32 | self.engine.start()
33 |
34 | # Wait a bit
35 | time.sleep(CertstreamTest.DEFAULT_DELAY)
36 |
37 | self.engine.stop()
38 | # We should see some data coming already
39 | self.assertTrue(self.debugger.count, 'Consuming data from certstream successfully')
40 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | os:
3 | - linux
4 | python:
5 | - '3.7'
6 | before_install:
7 | - sudo apt-get install -y libenchant-dev
8 | - sudo apt-get install -y apt-transport-https
9 | - wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
10 | - echo "deb https://artifacts.elastic.co/packages/6.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-6.x.list
11 | - sudo apt-get update && sudo apt-get remove -y elasticsearch
12 | - sudo apt-get install -y elasticsearch
13 | - sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch
14 | - sudo service elasticsearch start
15 | install:
16 | - pip install --upgrade pytest
17 | - pip install pytest-pep8 pytest-cov
18 | - pip install codecov
19 | - pip install elasticsearch_dsl certstream pyahocorasick tldextract wordsegment pyenchant idna confusable-homoglyphs
20 | - pip install git+https://github.com/casics/nostril.git
21 | - pip install -e .[tests]
22 | before_script:
23 | - sleep 10
24 | - sudo systemctl -l status elasticsearch
25 | - curl 'http://localhost:9200'
26 | script:
27 | - pytest --pep8 -m pep8 certstream_analytics/
28 | - PYTHONPATH=$PWD:$PYTHONPATH pytest --cov=./ tests/
29 | after_script:
30 | - curl 'http://localhost:9200/_cat/indices?v'
31 | after_success:
32 | - codecov
33 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | '''
2 | Standard Python setup script.
3 | '''
4 |
5 | from setuptools import setup, find_packages
6 |
7 | with open('README.md', 'r') as fh:
8 | long_description = fh.read()
9 |
10 | setup(
11 | name='certstream-analytics',
12 | version='0.1.7',
13 | description='certstream + analytics',
14 | url='https://github.com/huydhn/certstream-analytics',
15 | author='Huy Do',
16 | author_email='huydhn@gmail.com',
17 | license='MIT',
18 | long_description=long_description,
19 | long_description_content_type='text/markdown',
20 | install_requires=[
21 | 'elasticsearch_dsl',
22 | 'certstream',
23 | 'pyahocorasick',
24 | 'tldextract',
25 | 'wordsegment',
26 | 'pyenchant',
27 | 'idna',
28 | 'confusable_homoglyphs'
29 | ],
30 | tests_require=[
31 | 'coverage',
32 | 'nose',
33 | 'pytest-pep8',
34 | 'pytest-cov',
35 | 'codecov'
36 | ],
37 | dependency_links=[
38 | 'https://github.com/casics/nostril/tarball/master'
39 | ],
40 | packages=find_packages(),
41 | scripts=['bin/domain_matching.py'],
42 | classifiers=[
43 | "Programming Language :: Python :: 3",
44 | "License :: OSI Approved :: MIT License",
45 | "Operating System :: OS Independent",
46 | ],
47 | )
48 |
--------------------------------------------------------------------------------
/scripts/sundry/isolation_forest.py:
--------------------------------------------------------------------------------
1 | '''
2 | Apply the isolation forest method to separate our outliers.
3 | '''
4 | import json
5 | import sys
6 | import numpy as np
7 |
8 | from sklearn.ensemble import IsolationForest
9 | from sklearn.preprocessing import scale
10 |
11 |
12 | def main():
13 | '''
14 | The procedure contains two simple steps:
15 | - Scale the data to the standard distribution with mean 0 and unit variance.
16 | This might be too simplistic.
17 | - Apply the isolation forest. The contamination level is set manually.
18 | '''
19 | domains = []
20 | raw = []
21 |
22 | with open(sys.argv[1]) as fhandle:
23 | for line in fhandle:
24 | record = json.loads(line.strip())
25 |
26 | for analyser in record['analysers']:
27 | if analyser['analyser'] == 'FeaturesGenerator':
28 | raw.extend(analyser['output'])
29 |
30 | if analyser['analyser'] == 'WordSegmentation':
31 | domains.extend(analyser['output'].keys())
32 |
33 | if len(raw) != len(domains):
34 | print(record)
35 | sys.exit(0)
36 |
37 | x_samples = scale(np.array(raw))
38 |
39 | engine = IsolationForest(behaviour='new', contamination=0.015)
40 | y_samples = engine.fit_predict(x_samples)
41 |
42 | for index, y_sample in enumerate(y_samples):
43 | if y_sample == -1:
44 | print(domains[index])
45 |
46 |
47 | if __name__ == '__main__':
48 | main()
49 |
--------------------------------------------------------------------------------
/scripts/sundry/elliptic_envelope.py:
--------------------------------------------------------------------------------
1 | '''
2 | Apply the elliptic envelope method to separate our outliers.
3 | '''
4 | import json
5 | import sys
6 | import numpy as np
7 |
8 | from sklearn.covariance import EllipticEnvelope
9 | from sklearn.preprocessing import scale
10 |
11 |
12 | def main():
13 | '''
14 | The procedure contains two simple steps:
15 | - Scale the data to the standard distribution with mean 0 and unit variance.
16 | This might be too simplistic.
17 | - Apply the elliptic envelope. The contamination level is set manually.
18 | '''
19 | domains = []
20 | raw = []
21 |
22 | with open(sys.argv[1]) as fhandle:
23 | for line in fhandle:
24 | record = json.loads(line.strip())
25 |
26 | for analyser in record['analysers']:
27 | if analyser['analyser'] == 'FeaturesGenerator':
28 | raw.extend(analyser['output'])
29 |
30 | if analyser['analyser'] == 'WordSegmentation':
31 | domains.extend(analyser['output'].keys())
32 |
33 | if len(raw) != len(domains):
34 | print(record)
35 | sys.exit(0)
36 |
37 | x_samples = scale(np.array(raw))
38 |
39 | engine = EllipticEnvelope(contamination=0.015, support_fraction=1.0)
40 | y_samples = engine.fit_predict(x_samples)
41 |
42 | for index, y_sample in enumerate(y_samples):
43 | if y_sample == -1:
44 | print(domains[index])
45 |
46 |
47 | if __name__ == '__main__':
48 | main()
49 |
--------------------------------------------------------------------------------
/scripts/sundry/lof.py:
--------------------------------------------------------------------------------
1 | '''
2 | Apply the local outlier factor method to separate our outliers.
3 | '''
4 | import json
5 | import sys
6 | import numpy as np
7 |
8 | from sklearn.neighbors import LocalOutlierFactor
9 | from sklearn.preprocessing import scale
10 |
11 |
12 | def main():
13 | '''
14 | The procedure contains two simple steps:
15 | - Scale the data to the standard distribution with mean 0 and unit variance.
16 | This might be too simplistic.
17 | - Apply the local outlier factor. The contamination level is set manually.
18 |
19 | This method does not seem to work in our case cause I suspect it treats groups
20 | of several outliers as clusters.
21 | '''
22 | domains = []
23 | raw = []
24 |
25 | with open(sys.argv[1]) as fhandle:
26 | for line in fhandle:
27 | record = json.loads(line.strip())
28 |
29 | for analyser in record['analysers']:
30 | if analyser['analyser'] == 'FeaturesGenerator':
31 | raw.extend(analyser['output'])
32 |
33 | if analyser['analyser'] == 'WordSegmentation':
34 | domains.extend(analyser['output'].keys())
35 |
36 | if len(raw) != len(domains):
37 | print(record)
38 | sys.exit(0)
39 |
40 | x_samples = scale(np.array(raw))
41 |
42 | # Need to check the appropriate value for n_neighbors
43 | engine = LocalOutlierFactor(contamination=0.015)
44 | y_samples = engine.fit_predict(x_samples)
45 |
46 | for index, y_sample in enumerate(y_samples):
47 | if y_sample == -1:
48 | print(domains[index])
49 |
50 |
51 | if __name__ == '__main__':
52 | main()
53 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | nohup.*
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 | *.txt
108 | .idea
109 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch.py:
--------------------------------------------------------------------------------
1 | '''
2 | Save some dummy records into Elasticsearch.
3 | '''
4 | import os
5 | import json
6 | import time
7 | import unittest
8 |
9 | from elasticsearch import Elasticsearch
10 | from elasticsearch_dsl import Search, Q
11 |
12 | from certstream_analytics.transformers import CertstreamTransformer
13 | from certstream_analytics.storages import ElasticsearchStorage
14 |
15 |
16 | class ElasticsearchTest(unittest.TestCase):
17 | '''
18 | Test the way we save data into Elasticsearch.
19 | '''
20 | def setUp(self):
21 | '''
22 | Setup the client to consume from certstream and save the data into
23 | Elasticsearch
24 | '''
25 | elasticsearch_host = os.getenv('ELASTICSEARCH_HOST', 'localhost:9200')
26 |
27 | self.transformer = CertstreamTransformer()
28 | self.storage = ElasticsearchStorage(hosts=[elasticsearch_host])
29 | self.search = Search(using=Elasticsearch(elasticsearch_host), index='certstream-*')
30 |
31 | def test_save(self):
32 | '''
33 | Start to save certstream data into Elasticsearch.
34 | '''
35 | current_dir = os.path.dirname(os.path.realpath(__file__))
36 |
37 | with open(os.path.join(current_dir, 'samples.json')) as fhandle:
38 | samples = json.load(fhandle)
39 |
40 | for sample in samples:
41 | filtered = self.transformer.apply(sample)
42 | self.storage.save(filtered)
43 |
44 | # Try to wait for a few seconds here so that Elasticsearch has enough
45 | # time to index the data
46 | time.sleep(5)
47 |
48 | for sample in samples:
49 | domain = sample['data']['leaf_cert']['all_domains'][0]
50 | # Look for the record in Elasticsearch
51 | query = Q('multi_match', query=domain, fields=['domain', 'san'])
52 | response = self.search.query(query).execute()
53 |
54 | self.assertGreaterEqual(response.hits.total, 1,
55 | 'The record has been indexed in Elasticsearch')
56 | self.assertIn(response.hits[0].domain, sample['data']['leaf_cert']['all_domains'],
57 | 'The correct record is returned')
58 |
--------------------------------------------------------------------------------
/certstream_analytics/analysers/base.py:
--------------------------------------------------------------------------------
1 | """
2 | Analyse the certificate data from certstream.
3 | """
4 | import json
5 | import logging
6 | from abc import ABCMeta, abstractmethod
7 |
8 |
9 | # pylint: disable=no-init,too-few-public-methods
10 | class Analyser:
11 | """
12 | Define the template of all analyser class.
13 | """
14 | __metaclass__ = ABCMeta
15 |
16 | @abstractmethod
17 | def run(self, record):
18 | """
19 | In normal cases, an analyser will process the record, save the result
20 | into the record, and then return the updated record so that the next
21 | analyser can choose what to do next. Therefore, the structure of the
22 | record comes from CertstreamTransformer class as follows:
23 |
24 | {
25 | # These fields are extracted from certstream
26 | cert_index: INTEGER,
27 | seen: TIMESTAMP,
28 | chain: [
29 | ORGANIZATION
30 | ],
31 | not_before: TIMESTAMP,
32 | not_after: TIMESTAMP,
33 | all_domains: [
34 | SAN
35 | ],
36 |
37 | # This is a place holder field which are used later by the
38 | # analysers. Each analyser will append its result here.
39 | analysers: [
40 | {
41 | analyser: ANALYSER NAME,
42 | output: ANYTHING GOES HERE,
43 | },
44 | ],
45 | }
46 | """
47 |
48 |
49 | class Debugger(Analyser):
50 | """
51 | A dummy analyser for debugging.
52 | """
53 | def __init__(self):
54 | """
55 | Keep track of the number of records so far for debugging purpose.
56 | """
57 | self.count = 0
58 |
59 | def run(self, record):
60 | '''
61 | This is a dummy analyser that will only print out the record it processes.
62 | '''
63 | logging.info(json.dumps(record))
64 |
65 | # Update the number of records so far
66 | self.count += 1
67 |
68 | if 'analysers' not in record:
69 | record['analysers'] = []
70 |
71 | record['analysers'].append({
72 | 'analyser': type(self).__name__,
73 | 'output': self.count,
74 | })
75 |
76 | return record
77 |
--------------------------------------------------------------------------------
/certstream_analytics/transformers/base.py:
--------------------------------------------------------------------------------
1 | """
2 | Transform the certificate data from certstream before passing it to the
3 | processing pipeline.
4 | """
5 | from abc import ABCMeta, abstractmethod
6 |
7 |
8 | # pylint: disable=no-init,too-few-public-methods
9 | class Transformer:
10 | """
11 | Define the template of all transformer class.
12 | """
13 | __metaclass__ = ABCMeta
14 |
15 | @abstractmethod
16 | def apply(self, raw):
17 | """
18 | Move along, nothing to see here.
19 | """
20 |
21 |
22 | class PassthroughTransformer(Transformer):
23 | """
24 | A dummy transformer that doesn't do anything.
25 | """
26 | def apply(self, raw):
27 | """
28 | Move along, nothing to see here.
29 | """
30 | return raw
31 |
32 |
33 | class CertstreamTransformer(Transformer):
34 | """
35 | Transform data from certstream into something readily consumable by the
36 | processing pipeline.
37 | """
38 | def apply(self, raw):
39 | """
40 | The format of the message from certstream can be found at their github
41 | documentation.
42 |
43 | So far, we are only interested in the domain names, the timestamps, and
44 | probably the content of the subject. So the returned stucture is as
45 | follows:
46 |
47 | {
48 | # These fields are extracted from certstream
49 | cert_index: INTEGER,
50 | seen: TIMESTAMP,
51 | chain: [
52 | ORGANIZATION
53 | ],
54 | not_before: TIMESTAMP,
55 | not_after: TIMESTAMP,
56 | all_domains: [
57 | SAN
58 | ],
59 |
60 | # This is a place holder field which are used later by the
61 | # analysers. Each analyser will append its result here.
62 | analysers: [
63 | {
64 | analyser: ANALYSER NAME,
65 | output: ANYTHING GOESE HERE,
66 | },
67 | ],
68 | }
69 | """
70 | filtered = {
71 | 'cert_index': raw['data']['cert_index'],
72 | 'seen': raw['data']['seen'],
73 | 'chain': [],
74 |
75 | # The analyser result will be stored here later on
76 | 'analysers': [],
77 | }
78 |
79 | interested_fields = ['not_before', 'not_after', 'all_domains']
80 |
81 | if raw['data']['leaf_cert']['all_domains']:
82 | filtered.update({k: raw['data']['leaf_cert'][k] for k in interested_fields})
83 | return filtered
84 |
85 | return None
86 |
--------------------------------------------------------------------------------
/certstream_analytics/storages/elasticsearch_storage.py:
--------------------------------------------------------------------------------
1 | """
2 | Save certstream data into Elasticsearch so that it can be queried by Kibana
3 | later on.
4 | """
5 | from datetime import datetime
6 | from elasticsearch_dsl import connections, analyzer
7 | from elasticsearch_dsl import Document, Date, Text, Keyword
8 |
9 | from .base import Storage
10 |
11 | ANALYZER = analyzer('standard_analyzer',
12 | tokenizer='standard_tokenizer',
13 | filter=['lowercase'])
14 |
15 |
16 | # pylint: disable=too-few-public-methods
17 | class ElasticsearchStorage(Storage):
18 | """
19 | An experiment Elasticsearch storage to keep and index the received records.
20 | """
21 | class Record(Document):
22 | """
23 | An Elasticsearch record as it is.
24 | """
25 | timestamp = Date(default_timezone='UTC')
26 |
27 | # As reported by certstream
28 | seen = Date(default_timezone='UTC')
29 |
30 | # The domain time to live
31 | not_before = Date(default_timezone='UTC')
32 | not_after = Date(default_timezone='UTC')
33 |
34 | # The domain and its alternative names
35 | domain = Text(analyzer=ANALYZER, fields={'raw': Keyword()})
36 | san = Text(analyzer=ANALYZER, fields={'raw': Keyword()})
37 |
38 | # The issuer
39 | chain = Text(analyzer=ANALYZER, fields={'raw': Keyword()})
40 |
41 | class Index:
42 | """
43 | Use daily indices.
44 | """
45 | name = 'certstream-*'
46 |
47 | # pylint: disable=arguments-differ
48 | def save(self, **kwargs):
49 | """
50 | Magically save the record in Elasticsearch.
51 | """
52 | self.timestamp = datetime.now()
53 | # Override the index to go to the proper timeslot
54 | kwargs['index'] = self.timestamp.strftime('certstream-%Y.%m.%d')
55 |
56 | return super().save(**kwargs)
57 |
58 | def __init__(self, hosts, timeout=10):
59 | """
60 | Provide the Elasticsearch hostname (Defaults to localhost).
61 | """
62 | connections.create_connection(hosts=hosts, timeout=timeout)
63 |
64 | def save(self, record):
65 | """
66 | Save the certstream record in Elasticsearch.
67 | """
68 | elasticsearch_record = ElasticsearchStorage.Record(meta={'id': record['cert_index']})
69 |
70 | # In miliseconds
71 | elasticsearch_record.seen = int(record['seen'] * 1000)
72 | elasticsearch_record.not_before = int(record['not_before'] * 1000)
73 | elasticsearch_record.not_after = int(record['not_after'] * 1000)
74 |
75 | # Elasticsearch will parse and index the domain and all its alternative names
76 | elasticsearch_record.domain = record['all_domains'][0]
77 | elasticsearch_record.san = record['all_domains'][1:]
78 |
79 | elasticsearch_record.save()
80 |
--------------------------------------------------------------------------------
/tests/test_reporter.py:
--------------------------------------------------------------------------------
1 | '''
2 | Various tests for the reporter module.
3 | '''
4 | import json
5 | import tempfile
6 | import unittest
7 |
8 | from certstream_analytics.reporters import FileReporter
9 |
10 |
11 | class FileReporterTest(unittest.TestCase):
12 | '''
13 | Test the file-based reporter.
14 | '''
15 | def setUp(self):
16 | '''
17 | Create a temporary file so that the test can write its reports into it.
18 | '''
19 | self.tmp = tempfile.NamedTemporaryFile()
20 | self.reporter = FileReporter(path=self.tmp.name)
21 |
22 | def test_report(self):
23 | '''
24 | Dump all the test reports to our temporary file.
25 | '''
26 | cases = [
27 | {
28 | 'report': {
29 | 'all_domains': ['store.google.com', 'google.com'],
30 | 'analysers': [
31 | {
32 | 'analyser': 'AhoCorasickDomainMatching',
33 | 'domain': 'store.google.com',
34 | 'match': 'google',
35 | },
36 | ],
37 | },
38 | 'description': 'Report an exact match domain',
39 | },
40 |
41 | {
42 | 'report': {
43 | 'all_domains': ['www.facebook.com.msg40.site'],
44 | 'analysers': [
45 | {
46 | 'analyser': 'AhoCorasickDomainMatching',
47 | 'domain': 'www.facebook.com.msg40.site',
48 | 'match': 'facebook',
49 | },
50 | ],
51 | },
52 | 'description': 'Report a phishing domain with a sub-domain match',
53 | },
54 |
55 | {
56 | 'report': {
57 | 'all_domains': ['login-appleid.apple.com.managesuppport.co'],
58 | 'analysers': [
59 | {
60 | 'analyser': 'AhoCorasickDomainMatching',
61 | 'domain': 'login-appleid.apple.com.managesuppport.co',
62 | 'match': 'apple',
63 | },
64 | ],
65 | },
66 | 'description': 'Report a phishing domain with a partial string match',
67 | },
68 |
69 | {
70 | 'report': {},
71 | 'description': 'Report nothing and thus will be ignored',
72 | },
73 | ]
74 |
75 | for case in cases:
76 | self.reporter.publish(case['report'])
77 |
78 | with open(self.tmp.name) as fhandler:
79 | lines = fhandler.readlines()
80 |
81 | for index, line in enumerate(lines):
82 | got = json.loads(line)
83 | self.assertDictEqual(got, cases[index]['report'], cases[index]['description'])
84 |
--------------------------------------------------------------------------------
/bin/domain_matching.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | A simple utility to query certstream and match its records with a list
4 | of known domains (from OpenDNS). This script can also save the data into
5 | downstream storage for further processing, for example, Elasticsearch.
6 | """
7 | import argparse
8 | import logging
9 | import signal
10 | import sys
11 | import time
12 |
13 | from certstream_analytics.analysers import AhoCorasickDomainMatching
14 | from certstream_analytics.analysers import WordSegmentation
15 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption
16 | from certstream_analytics.analysers import BulkDomainMarker
17 | from certstream_analytics.analysers import IDNADecoder
18 | from certstream_analytics.analysers import HomoglyphsDecoder
19 | from certstream_analytics.analysers import FeaturesGenerator
20 | from certstream_analytics.transformers import CertstreamTransformer
21 | from certstream_analytics.reporters import FileReporter
22 | from certstream_analytics.storages import ElasticsearchStorage
23 | from certstream_analytics.stream import CertstreamAnalytics
24 |
25 | DONE = False
26 |
27 |
28 | # pylint: disable=unused-argument
29 | def exit_gracefully(signum, stack):
30 | """
31 | Just to be nice.
32 | """
33 | # pylint: disable=global-statement
34 | global DONE
35 | DONE = True
36 |
37 |
38 | def init_analysers(domains_file, include_tld, matching_option):
39 | """
40 | Initialize all the analysers for matching domains. The list includes:
41 |
42 | - IDNA
43 | - Homoglyphs
44 | - AhoCorasick
45 | - Word segmentation
46 | - Bulk domains
47 | - Meta domain matching
48 | """
49 | with open(domains_file) as fhandle:
50 | domains = [line.rstrip() for line in fhandle]
51 |
52 | # Initialize all analysers. Note that their order is important cause they
53 | # will be executed in that order
54 | return [
55 | IDNADecoder(),
56 | HomoglyphsDecoder(greedy=False),
57 | AhoCorasickDomainMatching(domains=domains),
58 | WordSegmentation(),
59 | BulkDomainMarker(),
60 | DomainMatching(include_tld=include_tld, option=matching_option),
61 | FeaturesGenerator(),
62 | ]
63 |
64 |
65 | def run():
66 | """
67 | A simple utility to query certstream and match its records to a list of
68 | known domains from OpenDNS.
69 | """
70 | epilog = '''
71 | examples:
72 | \033[1;33m/usr/bin/domain_matching.py --elasticsearch-host elasticsearch:9200\033[0m
73 |
74 | \033[1;33m/usr/bin/domain_matching.py --dump-location certstream.txt\033[0m
75 |
76 | \033[1;33m/usr/bin/domain_matching.py --domains opendns-top-domains.txt\033[0m
77 |
78 | Consume data from Certstream and does its magic.
79 | '''
80 | parser = argparse.ArgumentParser(description=__doc__, epilog=epilog,
81 | formatter_class=argparse.RawDescriptionHelpFormatter)
82 |
83 | parser.add_argument('--domains',
84 | help='the list of domains to match with (e.g. opendns-top-domains.txt)')
85 |
86 | parser.add_argument('--elasticsearch-host',
87 | help='set the Elasticsearch host to store the records from Certstream')
88 |
89 | parser.add_argument('--dump-location',
90 | help='where to dump the records from Certstream')
91 |
92 | try:
93 | args = parser.parse_args()
94 | # pylint: disable=broad-except
95 | except Exception as error:
96 | logging.error(error)
97 | # some errors occur when parsing the arguments, show the usage
98 | parser.print_help()
99 | # then quit
100 | sys.exit(1)
101 |
102 | transformer = CertstreamTransformer()
103 | analysers = init_analysers(domains_file=args.domains,
104 | include_tld=True,
105 | matching_option=DomainMatchingOption.ORDER_MATCH)
106 | reporter = FileReporter(path=args.dump_location) if args.dump_location else None
107 | storage = ElasticsearchStorage(hosts=[args.elasticsearch_host]) if args.elasticsearch_host else None
108 |
109 | engine = CertstreamAnalytics(transformer=transformer,
110 | storages=storage,
111 | analysers=analysers,
112 | reporters=reporter)
113 | engine.start()
114 |
115 | while not DONE:
116 | time.sleep(1)
117 |
118 | engine.stop()
119 |
120 |
121 | if __name__ == '__main__':
122 | # Make sure that we can exit gracefully
123 | signal.signal(signal.SIGINT, exit_gracefully)
124 | signal.signal(signal.SIGTERM, exit_gracefully)
125 |
126 | run()
127 |
--------------------------------------------------------------------------------
/certstream_analytics/stream.py:
--------------------------------------------------------------------------------
1 | """
2 | All hail [certstream](https://github.com/CaliDog/certstream-python)!!
3 |
4 | This module consumes the feed of certificates from certstream and does
5 | the heavy lifting.
6 | """
7 | import sys
8 | import threading
9 | import certstream
10 |
11 | from certstream_analytics.analysers import Analyser
12 | from certstream_analytics.reporters import Reporter
13 | from certstream_analytics.storages import Storage
14 |
15 |
16 | class CertstreamAnalytics():
17 | """
18 | Consume the feed of certificates from certstream, transform the data, and
19 | save it into various storages.
20 | """
21 |
22 | def __init__(self, transformer=None, storages=None, analysers=None, reporters=None):
23 | """
24 | This is the entry point of the whole module. It consumes data from
25 | certstream, transform it using a Transformer class, save it into
26 | a predefined storage (elasticsearch), and run the use-defined
27 | analysis.
28 |
29 | The transformer can be None or a subclass of CertstreamTransformer. It
30 | transform the raw data from certstream.
31 |
32 | The storage can be None or a subclass of CertstreamStorage. A sample
33 | kind of storage is Elasticsearch.
34 |
35 | The analyser can be None or a subclass of CertstreamAnalyser. It's
36 | entirely up to the user to decide what to do here with the transformed
37 | data from certstream.
38 |
39 | The reporter, as its name implies, collects and publishes the analyser
40 | result somewhere, for example, email notification. It will be a subclass
41 | of CertstreamReporter.
42 | """
43 | self.transformer = transformer
44 |
45 | self.analysers = []
46 | self.reporters = []
47 | self.storages = []
48 |
49 | def _init_member(member, value, kind):
50 | """
51 | Initialize all storages, analysers, and reporters.
52 | """
53 | if value:
54 | if isinstance(value, (list, tuple)):
55 | setattr(self, member, value)
56 | else:
57 | getattr(self, member).append(value)
58 |
59 | for type_check in getattr(self, member):
60 | if not isinstance(type_check, kind):
61 | raise TypeError('Invalid {} type: {}'.format(member, type(type_check).__name__))
62 |
63 | _init_member('analysers', analysers, Analyser)
64 | _init_member('reporters', reporters, Reporter)
65 | _init_member('storages', storages, Storage)
66 |
67 | self.stopped = True
68 | self.thread = None
69 |
70 | def start(self):
71 | """
72 | Start consuming data from certstream.
73 | """
74 | # Run the stream in a separate thread
75 | self.thread = threading.Thread(target=self._consume)
76 | # So that it will be killed when the main thread stop
77 | self.thread.daemon = True
78 | self.thread.start()
79 |
80 | def stop(self):
81 | """
82 | Stop consuming data from certstream.
83 | """
84 | if self.stopped:
85 | return
86 |
87 | self.stopped = True
88 | self.thread.join()
89 |
90 | def _consume(self):
91 | """
92 | Start consuming the data from certstream.
93 | """
94 | self.stopped = False
95 | # pylint: disable=unnecessary-lambda
96 | certstream.listen_for_events(lambda m, c: self._callback(m, c),
97 | url='wss://certstream.calidog.io')
98 |
99 | # pylint: disable=unused-argument
100 | def _callback(self, message, context):
101 | """
102 | The callback handler template itself.
103 | """
104 | if self.stopped:
105 | sys.exit()
106 |
107 | if message['message_type'] == 'heartbeat':
108 | return
109 |
110 | if message['message_type'] == 'certificate_update':
111 | if self.transformer:
112 | # Apply the user-defined transformation. The structure of the raw
113 | # message is at See https://github.com/CaliDog/certstream-python/
114 | transformed_message = self.transformer.apply(message)
115 | else:
116 | transformed_message = message
117 |
118 | if self.storages and transformed_message:
119 | # Save the message into a more permanent storage. May be we should
120 | # support multiple storages in parallel here
121 | for storage in self.storages:
122 | storage.save(transformed_message)
123 |
124 | if self.analysers:
125 | # Note that the order of analysers is extremely important cause the
126 | # output of an analyser will be come the input of the next analyser
127 | for analyser in self.analysers:
128 | if not transformed_message:
129 | break
130 |
131 | # Run something here
132 | transformed_message = analyser.run(transformed_message)
133 |
134 | if self.reporters and transformed_message:
135 | # and report the final result
136 | for reporter in self.reporters:
137 | reporter.publish(transformed_message)
138 |
--------------------------------------------------------------------------------
/scripts/replay.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | '''
3 | Replay a stream of records from certstream to test the processing pipeline.
4 | '''
5 | import argparse
6 | import json
7 | import logging
8 | import sys
9 |
10 | from certstream_analytics.analysers import AhoCorasickDomainMatching
11 | from certstream_analytics.analysers import WordSegmentation
12 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption
13 | from certstream_analytics.analysers import BulkDomainMarker
14 | from certstream_analytics.analysers import IDNADecoder
15 | from certstream_analytics.analysers import HomoglyphsDecoder
16 | from certstream_analytics.analysers import FeaturesGenerator
17 | from certstream_analytics.reporters import FileReporter
18 | from certstream_analytics.storages import ElasticsearchStorage
19 |
20 |
21 | SUPPORTED_REPORTERS = {
22 | 'file': lambda location: FileReporter(path=location)
23 | }
24 |
25 | SUPPORTED_STORAGES = {
26 | 'elasticsearch': lambda host: ElasticsearchStorage(hosts=[host])
27 | }
28 |
29 |
30 | def init_analysers(domains_file, include_tld, matching_option):
31 | '''
32 | Initialize all the analysers for matching domains. The list includes:
33 |
34 | - IDNA
35 | - Homoglyphs
36 | - AhoCorasick
37 | - Word segmentation
38 | - Bulk domains
39 | - Meta domain matching
40 | '''
41 | with open(domains_file) as fhandle:
42 | domains = [line.rstrip() for line in fhandle]
43 |
44 | # Initialize all analysers. Note that their order is important cause they
45 | # will be executed in that order
46 | return [
47 | IDNADecoder(),
48 | HomoglyphsDecoder(greedy=False),
49 | AhoCorasickDomainMatching(domains=domains),
50 | WordSegmentation(),
51 | BulkDomainMarker(),
52 | DomainMatching(include_tld=include_tld, option=matching_option),
53 | FeaturesGenerator(),
54 | ]
55 |
56 |
57 | def run():
58 | '''
59 | A simple utility to replay certstream and match the records to a list of
60 | known domains from OpenDNS. It also generates several features for each
61 | domain such as the domain length.
62 | '''
63 | epilog = '''
64 | examples:
65 | \033[1;33m/usr/bin/replay.py --replay certstream.txt\033[0m
66 |
67 | \033[1;33m/usr/bin/replay.py --storage-host elasticsearch:9200 --storage elasticsearch\033[0m
68 |
69 | \033[1;33m/usr/bin/replay.py --report-location report.txt --report file\033[0m
70 |
71 | \033[1;33m/usr/bin/replay.py --domains opendns-top-domains.txt\033[0m
72 |
73 | Replay data from certstream.
74 | '''
75 | parser = argparse.ArgumentParser(description=__doc__, epilog=epilog,
76 | formatter_class=argparse.RawDescriptionHelpFormatter)
77 |
78 | parser.add_argument('--replay',
79 | help='the list of records from certstream (one per line)')
80 | parser.add_argument('--domains',
81 | help='the list of domains to match with (opendns-top-domains.txt)')
82 |
83 | parser.add_argument('--storage-host', default='localhost:9200',
84 | help='set the storage host')
85 | parser.add_argument('-s', '--storage',
86 | help='choose the storage type (elasticsearch)')
87 |
88 | parser.add_argument('--report-location',
89 | help='where to save the report to?')
90 | parser.add_argument('-r', '--report', default='file',
91 | help='choose the reporter type')
92 |
93 | try:
94 | args = parser.parse_args()
95 | # pylint: disable=broad-except
96 | except Exception as error:
97 | logging.error(error)
98 | # some errors occur when parsing the arguments, show the usage
99 | parser.print_help()
100 | # then quit
101 | sys.exit(1)
102 |
103 | if args.report and args.report not in SUPPORTED_REPORTERS:
104 | error = 'Report type \033[1;31m{}\033[0m is not supported. The list of supported reporters includes: {}' \
105 | .format(args.report, list(SUPPORTED_REPORTERS.keys()))
106 |
107 | logging.error(error)
108 | # Encounter an unsupported storage type
109 | sys.exit(1)
110 |
111 | if args.storage and args.storage not in SUPPORTED_STORAGES:
112 | error = 'Storage type \033[1;31m{}\033[0m is not supported. The list of supported storages includes: {}' \
113 | .format(args.storage, list(SUPPORTED_STORAGES.keys()))
114 |
115 | logging.error(error)
116 | # Encounter an unsupported storage type
117 | sys.exit(1)
118 |
119 | analysers = init_analysers(domains_file=args.domains,
120 | include_tld=True,
121 | matching_option=DomainMatchingOption.ORDER_MATCH)
122 |
123 | if args.report:
124 | reporter = SUPPORTED_REPORTERS[args.report](args.report_location)
125 |
126 | if args.storage:
127 | storage = SUPPORTED_STORAGES[args.storage](args.storage_host)
128 |
129 | with open(args.replay) as fhandler:
130 | for raw in fhandler:
131 | try:
132 | record = json.loads(raw)
133 | except json.decoder.JSONDecodeError:
134 | continue
135 |
136 | if args.storage:
137 | storage.save(record)
138 |
139 | for analyser in analysers:
140 | # Run something here
141 | record = analyser.run(record)
142 |
143 | reporter.publish(record)
144 |
145 | if __name__ == '__main__':
146 | run()
147 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Certstream + Analytics
2 |
3 | [](https://travis-ci.org/huydhn/certstream-analytics)
4 | [](http://codecov.io/gh/huydhn/certstream-analytics?branch=master)
5 |
6 |
7 | # Installation
8 |
9 | The package can be installed from
10 | [PyPI](https://pypi.org/project/certstream-analytics)
11 |
12 | ```
13 | pip install certstream-analytics
14 | ```
15 |
16 | # Quick usage
17 |
18 | ```python
19 | bin/domain_matching.py --domains domains.txt --dump-location certstream.txt
20 |
21 | # The file domains.txt contains the list of domains that we want to monitor
22 | # for matches (domains with similar names). For examples, a file with only
23 | # two entries:
24 | #
25 | # gmail.com
26 | # facebook.com
27 | #
28 | # will match any domains that contains gmail or facebook keywords.
29 | #
30 | # All the records consumed from certstream will be kept in certstream.txt
31 | ```
32 |
33 | # API
34 |
35 | ```python
36 | import time
37 |
38 | from certstream_analytics.analysers import WordSegmentation
39 | from certstream_analytics.analysers import IDNADecoder
40 | from certstream_analytics.analysers import HomoglyphsDecoder
41 |
42 | from certstream_analytics.transformers import CertstreamTransformer
43 | from certstream_analytics.storages import ElasticsearchStorage
44 | from certstream_analytics.stream import CertstreamAnalytics
45 |
46 | done = False
47 |
48 | # These analysers will be run in the same order
49 | analyser = [
50 | IDNADecoder(),
51 | HomoglyphsDecoder(),
52 | WordSegmentation(),
53 | ]
54 |
55 | # The following fields are filtered out and indexed:
56 | # - String: domain
57 | # - List: SAN
58 | # - List: Trust chain
59 | # - Timestamp: Not before
60 | # - Timestamp: Not after
61 | # - Timestamp: Seen
62 | transformer = CertstreamTransformer()
63 |
64 | # Indexed the data in Elasticsearch
65 | storage = ElasticsearchStorage(hosts=['localhost:9200'])
66 |
67 | consumer = CertstreamAnalytics(transformer=transformer,
68 | storage=storage,
69 | analyser=analyser)
70 | # The consumer is run in another thread so this function is non-blocking
71 | consumer.start()
72 |
73 | while not done:
74 | time.sleep(1)
75 |
76 | consumer.stop()
77 | ```
78 |
79 | ## IDNA decoder
80 | This analyser decode IDNA domain name into Unicode for further processing
81 | downstream. Normally, it will be the very first analyser to be run. If
82 | the analyser encounters a malform IDNA domain string, it will keep the
83 | domain as it is.
84 |
85 | ```python
86 | from certstream_analytics.analysers import IDNADecoder
87 |
88 | decoder = IDNADecoder()
89 |
90 | # Just an example dummy record
91 | record = {
92 | 'all_domains': [
93 | 'xn--f1ahbgpekke1h.xn--p1ai',
94 | ]
95 | }
96 |
97 | # The domain name will now become 'укрэмпужск.рф'
98 | print(decoder.run(record))
99 | ```
100 |
101 | ## Homoglyphs decoder
102 | There are lots of phishing websites that utilize [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph)
103 | to lure the victims. Some common examples include 'l' and 'i' or the
104 | Unicode character RHO '𝞀' and 'p'. The homoglyphs decoder uses the excellent
105 | [confusable_homoglyphs](https://github.com/vhf/confusable_homoglyphs) to
106 | generate all potential alternative domain names in ASCII.
107 |
108 | ```python
109 | from certstream_analytics.analysers import HomoglyphsDecoder
110 |
111 | # If the greedy flag is set, all alternative domains will be returned
112 | decoder = HomoglyphsDecoder(greed=False)
113 |
114 | # Just an example dummy record
115 | record = {
116 | 'all_domains': [
117 | # MATHEMATICAL MONOSPACE SMALL P
118 | '*.𝗉aypal.com',
119 |
120 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO
121 | '*.𝗉ay𝞀al.com',
122 | ]
123 | }
124 |
125 | # The domain name will now be converted to '*.paypal.com' with the ASCII
126 | # character p
127 | print(decoder.run(record))
128 | ```
129 |
130 | ## Aho-Corasick
131 | A domain and its SAN from Certstream will be compared against a list of
132 | most popular [domains](https://github.com/opendns/public-domain-lists)
133 | (from OpenDNS) using Aho-Corasick algorithm. This is a simple check to
134 | remove some of the most obvious phishing domains, for examples, *www.facebook.com.msg40.site*
135 | will match with *facebook* cause *facebook* is in the above list of most
136 | popular domains (I wonder how long it is going to last).
137 |
138 | ```python
139 | from certstream_analytics.analysers import AhoCorasickDomainMatching
140 | from certstream_analytics.reporter import FileReporter
141 |
142 | # Print the list of matching domains
143 | reporter = FileReporter('matching-results.txt')
144 |
145 | with open('opendns-top-domains.txt')) as fhandle:
146 | domains = [line.rstrip() for line in fhandle]
147 |
148 | # The list of domains to match against
149 | domain_matching_analyser = AhoCorasickDomainMatching(domains)
150 |
151 | consumer = CertstreamAnalytics(transformer=transformer,
152 | analyser=domain_matching_analyser,
153 | reporter=reporter)
154 |
155 | # Need to think about what to do with the matching result
156 | consumer.start()
157 |
158 | while not done:
159 | time.sleep(1)
160 |
161 | consumer.stop()
162 | ```
163 |
164 | ## Word segmentation
165 | In order to improve the accuracy of the matching algorithm, we segment
166 | the domains into English words using
167 | [wordsegment](https://github.com/grantjenks/python-wordsegment).
168 |
169 | ```python
170 | from certstream_analytics.analysers import WordSegmentation
171 |
172 | wordsegmentation = WordSegmentation()
173 |
174 | # Just an example dummy record
175 | record = {
176 | 'all_domains': [
177 | 'login-appleid.apple.com.managesupport.co',
178 | ]
179 | }
180 |
181 | # The returned output is as follows:
182 | #
183 | # {
184 | # 'analyser': 'WordSegmentation',
185 | # 'output': {
186 | # 'login-appleid.apple.com.managesuppport.co': [
187 | # 'login',
188 | # 'apple',
189 | # 'id',
190 | # 'apple',
191 | # 'com',
192 | # 'manage',
193 | # 'support',
194 | # 'co'
195 | # ],
196 | # },
197 | #
198 | print(decoder.run(record))
199 | ```
200 |
201 | ## Features generator
202 | A list of features for each domain will also be generated so that they
203 | can be used for classification jobs further downstream. The list
204 | includes:
205 |
206 | - The number of dot-separated fields in the domain, for example, www.google.com has 3.
207 | - The overall length of the domain in characters.
208 | - The length of the longest dot-separate field .
209 | - The length of the TLD, e.g. .online (6) or .download (8) is longer than .com (3).
210 | - The randomness level of the domain. [Nostril](https://github.com/casics/nostril)
211 | package is used to check how many words as returned by the WordSegmentation
212 | analyser are non-sense.
213 |
--------------------------------------------------------------------------------
/certstream_analytics/analysers/domain_matching.py:
--------------------------------------------------------------------------------
1 | """
2 | Verify the domain against the list of most popular domains from OpenDNS
3 | (https://github.com/opendns/public-domain-lists). Let's see how useful
4 | it is to prevent phishing domains.
5 | """
6 | from enum import Enum
7 |
8 | import json
9 | import logging
10 | import re
11 | import tldextract
12 | import ahocorasick
13 | import wordsegment
14 |
15 | from .base import Analyser
16 | from .common_domain_analyser import BulkDomainMarker
17 | from .common_domain_analyser import WordSegmentation
18 |
19 |
20 | # pylint: disable=too-few-public-methods
21 | class AhoCorasickDomainMatching(Analyser):
22 | """
23 | The domain and its SAN will be compared against the list of domains, for
24 | example, the most popular domains from OpenDNS.
25 | """
26 | # Get this number from the histogram of the length of all top domains
27 | MIN_MATCHING_LENGTH = 3
28 |
29 | # Some domains that don't work too well with tldextract and generate too
30 | # many FPs
31 | EXCLUDED_DOMAINS = {
32 | 'www': 1,
33 | 'web': 1,
34 | }
35 |
36 | # Some common domain parts that cause too many FP
37 | IGNORED_PARTS = r'^(autodiscover\.|cpanel\.)'
38 |
39 | def __init__(self, domains):
40 | """
41 | Use Aho-Corasick to find the matching domain so we construct its Trie
42 | here. Thought: How the f**k is com.com in the list?
43 | """
44 | self.automaton = ahocorasick.Automaton()
45 | self.domains = {}
46 |
47 | for index, domain in enumerate(domains):
48 | # Processing only the domain part. All sub-domains or TLDs will
49 | # be ignored, for example:
50 | # - www.google.com becomes google
51 | # - www.google.co.uk becomes google
52 | # - del.icio.us becomes icio
53 | ext = tldextract.extract(domain)
54 |
55 | if ext.domain in AhoCorasickDomainMatching.EXCLUDED_DOMAINS:
56 | continue
57 |
58 | self.automaton.add_word(ext.domain, (index, ext.domain))
59 | self.domains[ext.domain] = domain
60 |
61 | self.automaton.make_automaton()
62 |
63 | def run(self, record):
64 | """
65 | Use Aho-Corasick to find the matching domain. Check the time complexity
66 | of this function later.
67 |
68 | Tricky situation #1: When the string (domain) in the Trie is too short,
69 | it could match many domains, for example, g.co or t.co. So they need
70 | to be ignored somehow. Looking at the histogram of the length of all
71 | domains in the list, there are only less than 100 domains with the
72 | length of 2 or less. So we choose to ignore those. Also, we will
73 | prefer longer match than a shorter one for now.
74 | """
75 | if 'analysers' not in record:
76 | record['analysers'] = []
77 |
78 | results = {}
79 | # Check the domain and all its SAN
80 | for domain in record['all_domains']:
81 | # Remove wildcard
82 | domain = re.sub(r'^\*\.', '', domain)
83 |
84 | # Remove some FP-prone parts
85 | domain = re.sub(AhoCorasickDomainMatching.IGNORED_PARTS, '', domain)
86 |
87 | # Similar to all domains in the list, the TLD will be stripped off
88 | ext = tldextract.extract(domain)
89 | # The match will be a tuple in the following format: (5, (0, 'google'))
90 | matches = [m[1][1] for m in self.automaton.iter('.'.join(ext[:2]))
91 | if len(m[1][1]) >= AhoCorasickDomainMatching.MIN_MATCHING_LENGTH]
92 |
93 | if matches:
94 | matches.sort(key=len)
95 |
96 | match = matches[-1]
97 | # We only keep the the longest match of the first matching domain
98 | # for now
99 | results[domain] = [self.domains[match]] if match in self.domains else match
100 | break
101 |
102 | if results:
103 | record['analysers'].append({
104 | 'analyser': type(self).__name__,
105 | 'output': results,
106 | })
107 |
108 | return record
109 |
110 |
111 | class DomainMatchingOption(Enum):
112 | """
113 | Control how strict we want to do our matching.
114 | """
115 | # For example applefake.it will match with apple.com case ['apple'] is
116 | # a subset of ['apple', 'fake']
117 | SUBSET_MATCH = 0
118 |
119 | # Similar but use in instead of issubset so that the order is preserved
120 | ORDER_MATCH = 1
121 |
122 |
123 | class DomainMatching(Analyser):
124 | """
125 | This is the first example of the new group of meta analysers which are used
126 | to combine the result of other analysers.
127 | """
128 | def __init__(self, include_tld=True, option=DomainMatchingOption.ORDER_MATCH):
129 | """
130 | Just load the wordsegment package, whatever it is.
131 | """
132 | wordsegment.load()
133 |
134 | # Save the matching option here so we can refer to it later
135 | self.include_tld = include_tld
136 |
137 | self.option = {
138 | DomainMatchingOption.SUBSET_MATCH: set,
139 | DomainMatchingOption.ORDER_MATCH: list,
140 | }[option]
141 |
142 | def run(self, record):
143 | """
144 | Note that a meta-analyser will need to run after other analysers have
145 | finished so that their outputs are available.
146 | """
147 | if 'analysers' not in record:
148 | return record
149 |
150 | analysers = {
151 | AhoCorasickDomainMatching.__name__: {},
152 | WordSegmentation.__name__: {},
153 | BulkDomainMarker.__name__: {},
154 | }
155 |
156 | for analyser in record['analysers']:
157 | name = analyser['analyser']
158 |
159 | if name not in analysers:
160 | continue
161 |
162 | if name == BulkDomainMarker.__name__ and analyser['output']:
163 | # Skip bulk record and deal with it later, with such large
164 | # number of SAN name, it's bound to be a match
165 | continue
166 |
167 | analysers[name] = analyser['output']
168 |
169 | # Check that all outputs are there before continuing
170 | if not analysers[AhoCorasickDomainMatching.__name__] or not analysers[WordSegmentation.__name__]:
171 | return record
172 |
173 | results = self._match(analysers[AhoCorasickDomainMatching.__name__],
174 | analysers[WordSegmentation.__name__])
175 |
176 | if results:
177 | record['analysers'].append({
178 | 'analyser': type(self).__name__,
179 | 'output': results,
180 | })
181 |
182 | # DEBUG
183 | logging.info(json.dumps(record))
184 |
185 | return record
186 |
187 | def _match(self, ahocorasick_output, segmentation_output):
188 | """
189 | Use internally by the run function to combine AhoCorasick and WordSegmentation
190 | results.
191 | """
192 | results = {}
193 | # Check all the matching domains reported by AhoCorasick analyser
194 | for match, domains in ahocorasick_output.items():
195 | # The result of AhoCorasick matcher is a list of matching domains, for example,
196 | #
197 | # {
198 | # 'analyser': 'AhoCorasickDomainMatching',
199 | # 'output': {
200 | # 'login-appleid.apple.com.managesuppport.co': ['apple.com', 'support.com'],
201 | # },
202 | # },
203 | #
204 | if match not in segmentation_output:
205 | continue
206 |
207 | phish = self.option(segmentation_output[match])
208 | match_ext = tldextract.extract(match)
209 |
210 | for domain in domains:
211 | ext = tldextract.extract(domain)
212 |
213 | # This record is from a legitimate source, for example, agrosupport.zendesk.com
214 | # will match with zendesk.com. In our case, we don't really care about this so
215 | # it will be ignored and not reported as a match.
216 | if ext[1:] == match_ext[1:]:
217 | continue
218 |
219 | tmp = []
220 | # Intuitively, it will be more accurate if we choose to include the TLD here.
221 | # For example, if both 'apple' and 'com' appear in the matching domain, it's
222 | # very likely that something phishing is going on here. On the other hand,
223 | # if only 'apple' occurs, we are not so sure and it's better left for more
224 | # advance analysers to have their says in that
225 | for part in ext[:] if self.include_tld else ext[:2]:
226 | for token in part.split('.'):
227 | tmp.extend(wordsegment.segment(token))
228 |
229 | legit = self.option(tmp)
230 |
231 | if (isinstance(phish, set) and legit.issubset(phish)) or \
232 | (isinstance(phish, list) and '.{}'.format('.'.join(legit)) in '.'.join(phish)):
233 | # Found a possible phishing domain
234 | if match not in results:
235 | results[match] = []
236 |
237 | results[match].append(domain)
238 |
239 | return results
240 |
--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 |
3 | # Specify a configuration file.
4 | #rcfile=
5 |
6 | # Python code to execute, usually for sys.path manipulation such as
7 | # pygtk.require().
8 | #init-hook=
9 |
10 | # Add files or directories to the blacklist. They should be base names, not
11 | # paths.
12 | ignore=CVS
13 |
14 | # Pickle collected data for later comparisons.
15 | persistent=yes
16 |
17 | # List of plugins (as comma separated values of python modules names) to load,
18 | # usually to register additional checkers.
19 | load-plugins=
20 |
21 | # Use multiple processes to speed up Pylint.
22 | jobs=4
23 |
24 | # Allow loading of arbitrary C extensions. Extensions are imported into the
25 | # active Python interpreter and may run arbitrary code.
26 | unsafe-load-any-extension=no
27 |
28 | # A comma-separated list of package or module names from where C extensions may
29 | # be loaded. Extensions are loading into the active Python interpreter and may
30 | # run arbitrary code
31 | extension-pkg-whitelist=
32 |
33 |
34 | [MESSAGES CONTROL]
35 |
36 | # Only show warnings with the listed confidence levels. Leave empty to show
37 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
38 | confidence=
39 |
40 | # Enable the message, report, category or checker with the given id(s). You can
41 | # either give multiple identifier separated by comma (,) or put this option
42 | # multiple time. See also the "--disable" option for examples.
43 | #enable=
44 |
45 | # Disable the message, report, category or checker with the given id(s). You
46 | # can either give multiple identifiers separated by comma (,) or put this
47 | # option multiple times (only on the command line, not in the configuration
48 | # file where it should appear only once).You can also use "--disable=all" to
49 | # disable everything first and then reenable specific checks. For example, if
50 | # you want to run only the similarities checker, you can use "--disable=all
51 | # --enable=similarities". If you want to run only the classes checker, but have
52 | # no Warning level messages displayed, use"--disable=all --enable=classes
53 | # --disable=W"
54 |
55 | disable=fixme,locally-disabled
56 |
57 | [REPORTS]
58 |
59 | # Set the output format. Available formats are text, parseable, colorized, msvs
60 | # (visual studio) and html. You can also give a reporter class, eg
61 | # mypackage.mymodule.MyReporterClass.
62 | output-format=parseable
63 |
64 | # Put messages in a separate file for each module / package specified on the
65 | # command line instead of printing them on stdout. Reports (if any) will be
66 | # written in a file name "pylint_global.[txt|html]".
67 | files-output=no
68 |
69 | # Tells whether to display a full report or only the messages
70 | reports=yes
71 |
72 | # Python expression which should return a note less than 10 (10 is the highest
73 | # note). You have access to the variables errors warning, statement which
74 | # respectively contain the number of errors / warnings messages and the total
75 | # number of statements analyzed. This is used by the global evaluation report
76 | # (RP0004).
77 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
78 |
79 | # Template used to display messages. This is a python new-style format string
80 | # used to format the message information. See doc for all details
81 | #msg-template=
82 |
83 |
84 | [LOGGING]
85 |
86 | # Logging modules to check that the string format arguments are in logging
87 | # function parameter format
88 | logging-modules=logging
89 |
90 |
91 | [MISCELLANEOUS]
92 |
93 | # List of note tags to take in consideration, separated by a comma.
94 | notes=FIXME,XXX,TODO
95 |
96 |
97 | [SIMILARITIES]
98 |
99 | # Minimum lines number of a similarity.
100 | min-similarity-lines=4
101 |
102 | # Ignore comments when computing similarities.
103 | ignore-comments=yes
104 |
105 | # Ignore docstrings when computing similarities.
106 | ignore-docstrings=yes
107 |
108 | # Ignore imports when computing similarities.
109 | ignore-imports=no
110 |
111 |
112 | [VARIABLES]
113 |
114 | # Tells whether we should check for unused import in __init__ files.
115 | init-import=no
116 |
117 | # A regular expression matching the name of dummy variables (i.e. expectedly
118 | # not used).
119 | dummy-variables-rgx=_$|dummy
120 |
121 | # List of additional names supposed to be defined in builtins. Remember that
122 | # you should avoid to define new builtins when possible.
123 | additional-builtins=
124 |
125 | # List of strings which can identify a callback function by name. A callback
126 | # name must start or end with one of those strings.
127 | callbacks=cb_,_cb
128 |
129 |
130 | [FORMAT]
131 |
132 | # Maximum number of characters on a single line.
133 | max-line-length=120
134 |
135 | # Regexp for a line that is allowed to be longer than the limit.
136 | ignore-long-lines=^\s*(# )??$
137 |
138 | # Allow the body of an if to be on the same line as the test if there is no
139 | # else.
140 | single-line-if-stmt=no
141 |
142 | # List of optional constructs for which whitespace checking is disabled
143 | no-space-check=trailing-comma,dict-separator
144 |
145 | # Maximum number of lines in a module
146 | max-module-lines=1000
147 |
148 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
149 | # tab).
150 | indent-string=' '
151 |
152 | # Number of spaces of indent required inside a hanging or continued line.
153 | indent-after-paren=4
154 |
155 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
156 | expected-line-ending-format=
157 |
158 |
159 | [BASIC]
160 |
161 | # List of builtins function names that should not be used, separated by a comma
162 | bad-functions=map,filter,input
163 |
164 | # Good variable names which should always be accepted, separated by a comma
165 | good-names=i,j,k,ex,Run,_
166 |
167 | # Bad variable names which should always be refused, separated by a comma
168 | bad-names=foo,bar,baz,toto,tutu,tata
169 |
170 | # Colon-delimited sets of names that determine each other's naming style when
171 | # the name regexes allow several styles.
172 | name-group=
173 |
174 | # Include a hint for the correct naming format with invalid-name
175 | include-naming-hint=no
176 |
177 | # Regular expression matching correct function names
178 | function-rgx=[a-z_][a-z0-9_]{2,30}$
179 |
180 | # Naming hint for function names
181 | function-name-hint=[a-z_][a-z0-9_]{2,30}$
182 |
183 | # Regular expression matching correct variable names
184 | variable-rgx=[a-z_][a-z0-9_]{2,30}$
185 |
186 | # Naming hint for variable names
187 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$
188 |
189 | # Regular expression matching correct constant names
190 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
191 |
192 | # Naming hint for constant names
193 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
194 |
195 | # Regular expression matching correct attribute names
196 | attr-rgx=[a-z_][a-z0-9_]{2,30}$
197 |
198 | # Naming hint for attribute names
199 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$
200 |
201 | # Regular expression matching correct argument names
202 | argument-rgx=[a-z_][a-z0-9_]{2,30}$
203 |
204 | # Naming hint for argument names
205 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$
206 |
207 | # Regular expression matching correct class attribute names
208 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
209 |
210 | # Naming hint for class attribute names
211 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
212 |
213 | # Regular expression matching correct inline iteration names
214 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
215 |
216 | # Naming hint for inline iteration names
217 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
218 |
219 | # Regular expression matching correct class names
220 | class-rgx=[A-Z_][a-zA-Z0-9]+$
221 |
222 | # Naming hint for class names
223 | class-name-hint=[A-Z_][a-zA-Z0-9]+$
224 |
225 | # Regular expression matching correct module names
226 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
227 |
228 | # Naming hint for module names
229 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
230 |
231 | # Regular expression matching correct method names
232 | method-rgx=[a-z_][a-z0-9_]{2,30}$
233 |
234 | # Naming hint for method names
235 | method-name-hint=[a-z_][a-z0-9_]{2,30}$
236 |
237 | # Regular expression which should only match function or class names that do
238 | # not require a docstring.
239 | no-docstring-rgx=__.*__
240 |
241 | # Minimum line length for functions/classes that require docstrings, shorter
242 | # ones are exempt.
243 | docstring-min-length=-1
244 |
245 | # List of decorators that define properties, such as abc.abstractproperty.
246 | property-classes=abc.abstractproperty
247 |
248 |
249 | [TYPECHECK]
250 |
251 | # Tells whether missing members accessed in mixin class should be ignored. A
252 | # mixin class is detected if its name ends with "mixin" (case insensitive).
253 | ignore-mixin-members=yes
254 |
255 | # List of module names for which member attributes should not be checked
256 | # (useful for modules/projects where namespaces are manipulated during runtime
257 | # and thus existing member attributes cannot be deduced by static analysis
258 | ignored-modules=
259 |
260 | # List of classes names for which member attributes should not be checked
261 | # (useful for classes with attributes dynamically set).
262 | ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local
263 |
264 | # List of members which are set dynamically and missed by pylint inference
265 | # system, and so shouldn't trigger E1101 when accessed. Python regular
266 | # expressions are accepted.
267 | generated-members=REQUEST,acl_users,aq_parent
268 |
269 | # List of decorators that create context managers from functions, such as
270 | # contextlib.contextmanager.
271 | contextmanager-decorators=contextlib.contextmanager
272 |
273 |
274 | [SPELLING]
275 |
276 | # Spelling dictionary name. Available dictionaries: none. To make it working
277 | # install python-enchant package.
278 | spelling-dict=
279 |
280 | # List of comma separated words that should not be checked.
281 | spelling-ignore-words=
282 |
283 | # A path to a file that contains private dictionary; one word per line.
284 | spelling-private-dict-file=
285 |
286 | # Tells whether to store unknown words to indicated private dictionary in
287 | # --spelling-private-dict-file option instead of raising a message.
288 | spelling-store-unknown-words=no
289 |
290 |
291 | [DESIGN]
292 |
293 | # Maximum number of arguments for function / method
294 | max-args=5
295 |
296 | # Argument names that match this expression will be ignored. Default to name
297 | # with leading underscore
298 | ignored-argument-names=_.*
299 |
300 | # Maximum number of locals for function / method body
301 | max-locals=15
302 |
303 | # Maximum number of return / yield for function / method body
304 | max-returns=6
305 |
306 | # Maximum number of branch for function / method body
307 | max-branches=12
308 |
309 | # Maximum number of statements in function / method body
310 | max-statements=50
311 |
312 | # Maximum number of parents for a class (see R0901).
313 | max-parents=7
314 |
315 | # Maximum number of attributes for a class (see R0902).
316 | max-attributes=7
317 |
318 | # Minimum number of public methods for a class (see R0903).
319 | min-public-methods=2
320 |
321 | # Maximum number of public methods for a class (see R0904).
322 | max-public-methods=20
323 |
324 |
325 | [CLASSES]
326 |
327 | # List of method names used to declare (i.e. assign) instance attributes.
328 | defining-attr-methods=__init__,__new__,setUp
329 |
330 | # List of valid names for the first argument in a class method.
331 | valid-classmethod-first-arg=cls
332 |
333 | # List of valid names for the first argument in a metaclass class method.
334 | valid-metaclass-classmethod-first-arg=mcs
335 |
336 | # List of member names, which should be excluded from the protected access
337 | # warning.
338 | exclude-protected=_asdict,_fields,_replace,_source,_make
339 |
340 |
341 | [IMPORTS]
342 |
343 | # Deprecated modules which should not be used, separated by a comma
344 | deprecated-modules=regsub,TERMIOS,Bastion,rexec
345 |
346 | # Create a graph of every (i.e. internal and external) dependencies in the
347 | # given file (report RP0402 must not be disabled)
348 | import-graph=
349 |
350 | # Create a graph of external dependencies in the given file (report RP0402 must
351 | # not be disabled)
352 | ext-import-graph=
353 |
354 | # Create a graph of internal dependencies in the given file (report RP0402 must
355 | # not be disabled)
356 | int-import-graph=
357 |
358 |
359 | [EXCEPTIONS]
360 |
361 | # Exceptions that will emit a warning when being caught. Defaults to
362 | # "Exception"
363 | overgeneral-exceptions=Exception
364 |
--------------------------------------------------------------------------------
/certstream_analytics/analysers/common_domain_analyser.py:
--------------------------------------------------------------------------------
1 | """
2 | The list of basic analysers includes:
3 | - WordSegmentation
4 | - IDNADecoder
5 | - HomoglyphsDecoder
6 | - FeaturesGenerator (generate various features for further downstream processing)
7 | - BulkDomainMarker
8 | """
9 | import re
10 | import tldextract
11 | import wordsegment
12 | from nostril import nonsense
13 | import idna
14 | from confusable_homoglyphs import confusables
15 |
16 | from .base import Analyser
17 |
18 |
19 | # pylint: disable=too-few-public-methods
20 | class WordSegmentation(Analyser):
21 | """
22 | Perform word segmentation of all the SAN domains as an attempt to make sense
23 | of their names. For example, both arch.mappleonline.com and apple-verifyupdate.serveftp.com
24 | domains have 'apple' inside but only the second one is an actual Apple phishing
25 | page. Intuitively, a good word segmentation algorithm will return:
26 |
27 | - arch + mapple + online + com
28 | - apple + verify + update + serve + ftp + com
29 |
30 | Thus, it's much easier to spot the second phishing domain.
31 |
32 | Implementation-wise, there are several existing packages around to do this, for
33 | example:
34 |
35 | - https://github.com/grantjenks/python-wordsegment
36 | - https://github.com/keredson/wordninja
37 |
38 | Let's see what they can do, take it away!
39 | """
40 | # Some common stop words that are in the list of most popular domains
41 | STOPWORDS = {
42 | 'app': 1,
43 | 'inc': 1,
44 | 'box': 1,
45 | 'health': 1,
46 | 'home': 1,
47 | 'space': 1,
48 | 'cars': 1,
49 | 'nature': 1,
50 | }
51 |
52 | def __init__(self):
53 | """
54 | Just load the wordsegment package, whatever it is.
55 | """
56 | wordsegment.load()
57 |
58 | def run(self, record):
59 | """
60 | Apply word segment to all the SAN domain names. Let's see if it makes
61 | any sense.
62 | """
63 | if 'analysers' not in record:
64 | record['analysers'] = []
65 |
66 | results = {}
67 | # Check the domain and all its SAN
68 | for domain in record['all_domains']:
69 | # Remove wild card
70 | domain = re.sub(r'^\*\.', '', domain)
71 |
72 | # The TLD will be stripped off cause it does not contribute anything here
73 | ext = tldextract.extract(domain)
74 |
75 | words = []
76 | # We choose to segment the TLD here as well, for example, .co.uk
77 | # will become ['co', 'uk']. Let see if this works out.
78 | for part in ext[:]:
79 | for token in part.split('.'):
80 | segmented = [w for w in wordsegment.segment(token) if w not in WordSegmentation.STOPWORDS]
81 |
82 | if segmented:
83 | words.extend(segmented)
84 | elif token:
85 | # For some IDNA domain like xn--wgbfq3d.xn--ngbc5azd, the segmentation
86 | # won't work and an empty array is returned. So we choose to just keep
87 | # the original token
88 | words.append(token)
89 |
90 | results[domain] = words
91 |
92 | if results:
93 | record['analysers'].append({
94 | 'analyser': type(self).__name__,
95 | 'output': results,
96 | })
97 |
98 | return record
99 |
100 |
101 | class BulkDomainMarker(Analyser):
102 | """
103 | Mark the record that has tons of SAN domains in it. Most of the time, they are
104 | completely unrelated domains and probably the result of some bulk registration
105 | process. Benign or not, they are still suspicious and probably spam. We can also
106 | verify the similarity among these domains. A lower similarity score means these
107 | domains are totally unrelated.
108 | """
109 | # Take a histogram here and find out the suitable value for this
110 | THRESHOLD = 15
111 |
112 | def __init__(self, threshold=THRESHOLD):
113 | """
114 | Set the threshold to mark the record as a bulk record.
115 | """
116 | self.threshold = threshold
117 |
118 | def run(self, record):
119 | """
120 | See if the record is a bulk record. We will just use the threshold as
121 | the indicator for now. So if a record has more SAN names than the
122 | threshold, it is a bulk record.
123 | """
124 | if 'analysers' not in record:
125 | record['analysers'] = []
126 |
127 | is_bulked = len(record['all_domains']) >= self.threshold
128 |
129 | record['analysers'].append({
130 | 'analyser': type(self).__name__,
131 | 'output': is_bulked,
132 | })
133 |
134 | return record
135 |
136 |
137 | class IDNADecoder(Analyser):
138 | """
139 | Decode all domains in IDNA format.
140 | """
141 | def run(self, record):
142 | """
143 | Check if a domain in the list is in IDNA format and convert it back to
144 | Unicode.
145 | """
146 | decoded = []
147 |
148 | for domain in record['all_domains']:
149 | wildcard = False
150 |
151 | try:
152 | if re.match(r'^\*\.', domain):
153 | wildcard = True
154 | # Remove wildcard cause it interfere with the IDNA module
155 | # and we'll put it back later
156 | domain = re.sub(r'^\*\.', '', domain)
157 |
158 | domain = idna.decode(domain)
159 |
160 | except idna.core.InvalidCodepoint:
161 | # Fail to decode the domain, just keep it as it is for now
162 | pass
163 | except UnicodeError:
164 | pass
165 | finally:
166 | if wildcard:
167 | domain = '*.{}'.format(domain)
168 |
169 | decoded.append(domain)
170 |
171 | record['all_domains'] = decoded
172 | return record
173 |
174 |
175 | class HomoglyphsDecoder(Analyser):
176 | """
177 | Smartly convert domains whose names include some suspicious homoglyphs to
178 | ASCII. This will probably need to be right done after IDNA conversion and
179 | before other analysers so that they can get benefits from it.
180 | """
181 | def __init__(self, greedy=False):
182 | """
183 | We rely on the confusable-homoglyphs at https://github.com/vhf/confusable_homoglyphs
184 | to do its magic.
185 |
186 | If the greedy flag is set, all alternative domains will be returned. Otherwise, only
187 | the first one will be available.
188 | """
189 | self.greedy = greedy
190 |
191 | @staticmethod
192 | def is_latin(alt):
193 | """
194 | Check if a string is in Latin cause, in our specific case, we will
195 | only care about Latin characters
196 | """
197 | lower_s = range(ord('a'), ord('z') + 1)
198 | upper_s = range(ord('A'), ord('Z') + 1)
199 |
200 | # We need to check the length of the homoglyph here cause
201 | # confusable_homoglyphs library nicely returns multi-character
202 | # match as well, for example, 'rn' has an alternative of 'm'
203 | for alt_c in alt:
204 | if ord(alt_c) not in lower_s and ord(alt_c) not in upper_s:
205 | return False
206 |
207 | return True
208 |
209 | def run(self, record):
210 | """
211 | Using the confusable-homoglyphs, we are going to generate all alternatives ASCII
212 | names of a domain. It's a bit of a brute force though.
213 | """
214 | decoded = []
215 |
216 | for domain in record['all_domains']:
217 | wildcard = False
218 |
219 | if re.match(r'^\*\.', domain):
220 | wildcard = True
221 | # Remove wild card to simplify the domain name a bit and we'll put it back later
222 | domain = re.sub(r'^\*\.', '', domain)
223 |
224 | hg_map = {hg['character']: hg for hg in confusables.is_confusable(domain, greedy=True)}
225 | decoded_domain_c = []
226 |
227 | for domain_c in domain:
228 | # Confusable homoglyphs could not find any homoglyphs for this character
229 | # so we decide to keep the original character as it is
230 | if domain_c not in hg_map:
231 | decoded_domain_c.append([domain_c])
232 | continue
233 |
234 | found = []
235 | hglyph = hg_map[domain_c]
236 |
237 | if hglyph['alias'] == 'LATIN':
238 | # The character is Latin, we don't need to do anything here
239 | found.append(hglyph['character'])
240 |
241 | for alt in hglyph['homoglyphs']:
242 | if HomoglyphsDecoder.is_latin(alt['c']):
243 | found.append(alt['c'].lower())
244 |
245 | # If nothing is found, we keep the original character
246 | if not found:
247 | found.append(hglyph['character'])
248 |
249 | decoded_domain_c.append(found)
250 |
251 | for alt in self._generate_alternatives(decoded_domain_c):
252 | if wildcard:
253 | alt = '*.{}'.format(alt)
254 |
255 | decoded.append(alt)
256 |
257 | if not self.greedy:
258 | break
259 |
260 | record['all_domains'] = decoded
261 | return record
262 |
263 | def _generate_alternatives(self, alt_characters, index=0, current=''):
264 | """
265 | Generate all alternative ASCII names of a domain using the list of all
266 | alternative characters.
267 | """
268 | if index == len(alt_characters):
269 | yield current
270 |
271 | else:
272 | for alt_c in alt_characters[index]:
273 | yield from self._generate_alternatives(alt_characters,
274 | index + 1,
275 | current + alt_c)
276 |
277 |
278 | class FeaturesGenerator(Analyser):
279 | """
280 | Generate features to detect outliers in the stream. In our case, the outliers is
281 | the 'suspicious' phishing domains.
282 | """
283 | NOSTRIL_LENGTH_LIMIT = 6
284 |
285 | # pylint: disable=invalid-name
286 | def run(self, record):
287 | """
288 | The list of features will be:
289 | - The number of domain parts, for example, www.google.com is 3.
290 | - The overall length in characters.
291 | - The length of the longest domain part.
292 | - The length of the TLD, e.g. .online or .download is longer than .com.
293 | - The randomness level of the domain.
294 | """
295 | if 'analysers' not in record:
296 | record['analysers'] = []
297 |
298 | x_samples = []
299 | Y_samples = []
300 |
301 | for analyser in record['analysers']:
302 | if analyser['analyser'] != 'WordSegmentation':
303 | continue
304 |
305 | for domain, segments in analyser['output'].items():
306 | # Remove wildcard domain
307 | domain = re.sub(r'^\*\.', '', domain)
308 |
309 | parts = domain.split('.')
310 |
311 | x = []
312 | # Compute the number of domain parts
313 | x.append(len(parts))
314 |
315 | # Compute the length of the whole domain
316 | x.append(len(domain))
317 |
318 | longest = ''
319 | # Compute the length of the longest domain parts
320 | for part in parts:
321 | if len(part) > len(longest):
322 | longest = part
323 |
324 | x.append(len(longest))
325 |
326 | # Compute the length of the TLD
327 | x.append(len(parts[-1]))
328 |
329 | randomness_count = 0
330 | # The nostril package which we are using to detect non-sense words
331 | # in the domain only returns a boolean verdict so may be we need to
332 | # think of how we want to quantify this
333 | for w in segments:
334 | try:
335 | if len(w) >= FeaturesGenerator.NOSTRIL_LENGTH_LIMIT and nonsense(w):
336 | randomness_count += 1
337 | except ValueError:
338 | continue
339 |
340 | x.append(randomness_count / len(segments))
341 |
342 | x_samples.append(x)
343 | Y_samples.append('usual_suspect' in record)
344 |
345 | break
346 |
347 | record['analysers'].append({
348 | 'analyser': type(self).__name__,
349 | 'output': x_samples,
350 | })
351 |
352 | return record
353 |
--------------------------------------------------------------------------------
/tests/samples.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "data": {
4 | "cert_index": 447858050,
5 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447858050&end=447858050",
6 | "chain": [
7 | {
8 | "as_der": "REDACT",
9 | "extensions": {
10 | "authorityInfoAccess": "OCSP - URI:http://ocsp.comodoca.com\nCA Issuers - URI:http://crt.comodoca.com/COMODORSAAddTrustCA.crt\n",
11 | "authorityKeyIdentifier": "keyid:BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4\n",
12 | "basicConstraints": "CA:TRUE",
13 | "certificatePolicies": "Policy: 2.23.140.1.2.1\nPolicy: 1.3.6.1.4.1.6449.1.2.2.52",
14 | "crlDistributionPoints": "Full Name:\n URI:http://crl.comodoca.com/COMODORSACertificationAuthority.crl",
15 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
16 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign",
17 | "subjectKeyIdentifier": "7E:03:5A:65:41:6B:A7:7E:0A:E1:B8:9D:08:EA:1D:8E:1D:6A:C7:65"
18 | },
19 | "fingerprint": "76:4D:2F:A5:9E:D1:23:F9:C9:55:70:C4:03:C9:2F:EF:33:8E:A7:45",
20 | "not_after": 1747526399,
21 | "not_before": 1431907200,
22 | "serial_number": "F01D4BEE7B7CA37B3C0566AC05972458",
23 | "subject": {
24 | "C": "US",
25 | "CN": "cPanel, Inc. Certification Authority",
26 | "L": "Houston",
27 | "O": "cPanel, Inc.",
28 | "OU": null,
29 | "ST": "TX",
30 | "aggregated": "/C=US/CN=cPanel, Inc. Certification Authority/L=Houston/O=cPanel, Inc./ST=TX"
31 | }
32 | },
33 | {
34 | "as_der": "REDACT",
35 | "extensions": {
36 | "basicConstraints": "CA:TRUE",
37 | "keyUsage": "Key Cert Sign, C R L Sign",
38 | "subjectKeyIdentifier": "BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4"
39 | },
40 | "fingerprint": "AF:E5:D2:44:A8:D1:19:42:30:FF:47:9F:E2:F8:97:BB:CD:7A:8C:B4",
41 | "not_after": 2147471999,
42 | "not_before": 1263859200,
43 | "serial_number": "4CAAF9CADB636FE01FF74ED85B03869D",
44 | "subject": {
45 | "C": "GB",
46 | "CN": "COMODO RSA Certification Authority",
47 | "L": "Salford",
48 | "O": "COMODO CA Limited",
49 | "OU": null,
50 | "ST": "Greater Manchester",
51 | "aggregated": "/C=GB/CN=COMODO RSA Certification Authority/L=Salford/O=COMODO CA Limited/ST=Greater Manchester"
52 | }
53 | }
54 | ],
55 | "leaf_cert": {
56 | "all_domains": [
57 | "firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl",
58 | "www.firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl"
59 | ],
60 | "as_der": "REDACT",
61 | "extensions": {
62 | "authorityInfoAccess": "OCSP - URI:http://ocsp.comodoca.com\nCA Issuers - URI:http://crt.comodoca.com/cPanelIncCertificationAuthority.crt\n",
63 | "authorityKeyIdentifier": "keyid:7E:03:5A:65:41:6B:A7:7E:0A:E1:B8:9D:08:EA:1D:8E:1D:6A:C7:65\n",
64 | "basicConstraints": "CA:FALSE",
65 | "certificatePolicies": "Policy: 2.23.140.1.2.1\nPolicy: 1.3.6.1.4.1.6449.1.2.2.52\n CPS: https://secure.comodo.com/CPS",
66 | "crlDistributionPoints": "Full Name:\n URI:http://crl.comodoca.com/cPanelIncCertificationAuthority.crl",
67 | "ctlPoisonByte": true,
68 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
69 | "keyUsage": "Digital Signature, Key Encipherment",
70 | "subjectAltName": "DNS:www.firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl, DNS:firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl",
71 | "subjectKeyIdentifier": "A6:F3:1B:BD:CB:A6:E0:95:E4:EA:86:C5:9D:FE:BC:9E:B1:C4:0B:FD"
72 | },
73 | "fingerprint": "32:8B:E0:CA:41:25:E0:EB:CD:92:29:7F:F3:17:3C:06:2C:3C:1F:D0",
74 | "not_after": 1546473599,
75 | "not_before": 1538611200,
76 | "serial_number": "DA28422511646C0552500F3DEE0AC20",
77 | "subject": {
78 | "C": null,
79 | "CN": "firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl",
80 | "L": null,
81 | "O": null,
82 | "OU": null,
83 | "ST": null,
84 | "aggregated": "/CN=firmyfarmaceutyczneeu.redirect.rejestracjadomen.pl"
85 | }
86 | },
87 | "seen": 1538635262.355275,
88 | "source": {
89 | "name": "Google 'Rocketeer' log",
90 | "url": "ct.googleapis.com/rocketeer/"
91 | },
92 | "update_type": "PrecertLogEntry"
93 | },
94 | "message_type": "certificate_update"
95 | },
96 | {
97 | "data": {
98 | "cert_index": 447858049,
99 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447858049&end=447858049",
100 | "chain": [
101 | {
102 | "as_der": "REDACT",
103 | "extensions": {
104 | "authorityInfoAccess": "CA Issuers - URI:http://apps.identrust.com/roots/dstrootcax3.p7c\nOCSP - URI:http://isrg.trustid.ocsp.identrust.com\n",
105 | "authorityKeyIdentifier": "keyid:C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10\n",
106 | "basicConstraints": "CA:TRUE",
107 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.root-x1.letsencrypt.org",
108 | "crlDistributionPoints": "Full Name:\n URI:http://crl.identrust.com/DSTROOTCAX3CRL.crl",
109 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign",
110 | "subjectKeyIdentifier": "A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1"
111 | },
112 | "fingerprint": "E6:A3:B4:5B:06:2D:50:9B:33:82:28:2D:19:6E:FE:97:D5:95:6C:CB",
113 | "not_after": 1615999246,
114 | "not_before": 1458232846,
115 | "serial_number": "A0141420000015385736A0B85ECA708",
116 | "subject": {
117 | "C": "US",
118 | "CN": "Let's Encrypt Authority X3",
119 | "L": null,
120 | "O": "Let's Encrypt",
121 | "OU": null,
122 | "ST": null,
123 | "aggregated": "/C=US/CN=Let's Encrypt Authority X3/O=Let's Encrypt"
124 | }
125 | },
126 | {
127 | "as_der": "REDACT",
128 | "extensions": {
129 | "basicConstraints": "CA:TRUE",
130 | "keyUsage": "Key Cert Sign, C R L Sign",
131 | "subjectKeyIdentifier": "C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10"
132 | },
133 | "fingerprint": "DA:C9:02:4F:54:D8:F6:DF:94:93:5F:B1:73:26:38:CA:6A:D7:7C:13",
134 | "not_after": 1633010475,
135 | "not_before": 970348339,
136 | "serial_number": "44AFB080D6A327BA893039862EF8406B",
137 | "subject": {
138 | "C": null,
139 | "CN": "DST Root CA X3",
140 | "L": null,
141 | "O": "Digital Signature Trust Co.",
142 | "OU": null,
143 | "ST": null,
144 | "aggregated": "/CN=DST Root CA X3/O=Digital Signature Trust Co."
145 | }
146 | }
147 | ],
148 | "leaf_cert": {
149 | "all_domains": [
150 | "rundschleifmaschinen-service.de",
151 | "www.rundschleifmaschinen-service.de"
152 | ],
153 | "as_der": "REDACT",
154 | "extensions": {
155 | "authorityInfoAccess": "CA Issuers - URI:http://cert.int-x3.letsencrypt.org/\nOCSP - URI:http://ocsp.int-x3.letsencrypt.org\n",
156 | "authorityKeyIdentifier": "keyid:A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1\n",
157 | "basicConstraints": "CA:FALSE",
158 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.letsencrypt.org\n User Notice: is Certificate may only be relied upon by Relying Parties and only in accordance with the Certificate Policy found at https://letsencrypt.org/repository/",
159 | "ctlSignedCertificateTimestamp": "BIHyAPAAdQBvU3asMfAxGdiZAKRRFf93FRwR2QLBACkGjbIImjfZEwAAAWYyuWCoAAAEAwBGMEQCIHAfF-WDz1YkPCONYN0aXohfUPFrhiKG61tXfDilc3dUAiB0oHYT0e5eCKi5k9mEzRpqC-NdvhEtr8qKBlxEoiQsGwB3ACk8UZZUyDlluqpQ_FgH1Ldvv1h6KXLcpMMM9OVFR_R4AAABZjK5YkwAAAQDAEgwRgIhAKlrVU0Na8GF1AT7lCpeUJMchwfHnFsjswnpultsgKQhAiEAuPvplxBQsMHbioLdPsNRQSr-xUHV2g7yZkUnKqZHbnQ=",
160 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
161 | "keyUsage": "Digital Signature, Key Encipherment",
162 | "subjectAltName": "DNS:www.rundschleifmaschinen-service.de, DNS:rundschleifmaschinen-service.de",
163 | "subjectKeyIdentifier": "E3:45:2E:7F:5C:8D:B4:17:CC:B8:73:09:E3:DA:F7:F3:F9:ED:F2:15"
164 | },
165 | "fingerprint": "9A:3A:AF:F8:DC:A4:18:4B:B6:46:61:F7:29:46:FA:42:9B:CA:9C:71",
166 | "not_after": 1546221701,
167 | "not_before": 1538445701,
168 | "serial_number": "3428B7C70A67819D5B9E7A13D2B9B8C778F",
169 | "subject": {
170 | "C": null,
171 | "CN": "rundschleifmaschinen-service.de",
172 | "L": null,
173 | "O": null,
174 | "OU": null,
175 | "ST": null,
176 | "aggregated": "/CN=rundschleifmaschinen-service.de"
177 | }
178 | },
179 | "seen": 1538635262.353125,
180 | "source": {
181 | "name": "Google 'Rocketeer' log",
182 | "url": "ct.googleapis.com/rocketeer/"
183 | },
184 | "update_type": "X509LogEntry"
185 | },
186 | "message_type": "certificate_update"
187 | },
188 | {
189 | "data": {
190 | "cert_index": 447857993,
191 | "cert_link": "http://ct.googleapis.com/rocketeer/ct/v1/get-entries?start=447857993&end=447857993",
192 | "chain": [
193 | {
194 | "as_der": "REDACT",
195 | "extensions": {
196 | "authorityInfoAccess": "CA Issuers - URI:http://apps.identrust.com/roots/dstrootcax3.p7c\nOCSP - URI:http://isrg.trustid.ocsp.identrust.com\n",
197 | "authorityKeyIdentifier": "keyid:C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10\n",
198 | "basicConstraints": "CA:TRUE",
199 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.root-x1.letsencrypt.org",
200 | "crlDistributionPoints": "Full Name:\n URI:http://crl.identrust.com/DSTROOTCAX3CRL.crl",
201 | "keyUsage": "Digital Signature, Key Cert Sign, C R L Sign",
202 | "subjectKeyIdentifier": "A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1"
203 | },
204 | "fingerprint": "E6:A3:B4:5B:06:2D:50:9B:33:82:28:2D:19:6E:FE:97:D5:95:6C:CB",
205 | "not_after": 1615999246,
206 | "not_before": 1458232846,
207 | "serial_number": "A0141420000015385736A0B85ECA708",
208 | "subject": {
209 | "C": "US",
210 | "CN": "Let's Encrypt Authority X3",
211 | "L": null,
212 | "O": "Let's Encrypt",
213 | "OU": null,
214 | "ST": null,
215 | "aggregated": "/C=US/CN=Let's Encrypt Authority X3/O=Let's Encrypt"
216 | }
217 | },
218 | {
219 | "as_der": "REDACT",
220 | "extensions": {
221 | "basicConstraints": "CA:TRUE",
222 | "keyUsage": "Key Cert Sign, C R L Sign",
223 | "subjectKeyIdentifier": "C4:A7:B1:A4:7B:2C:71:FA:DB:E1:4B:90:75:FF:C4:15:60:85:89:10"
224 | },
225 | "fingerprint": "DA:C9:02:4F:54:D8:F6:DF:94:93:5F:B1:73:26:38:CA:6A:D7:7C:13",
226 | "not_after": 1633010475,
227 | "not_before": 970348339,
228 | "serial_number": "44AFB080D6A327BA893039862EF8406B",
229 | "subject": {
230 | "C": null,
231 | "CN": "DST Root CA X3",
232 | "L": null,
233 | "O": "Digital Signature Trust Co.",
234 | "OU": null,
235 | "ST": null,
236 | "aggregated": "/CN=DST Root CA X3/O=Digital Signature Trust Co."
237 | }
238 | }
239 | ],
240 | "leaf_cert": {
241 | "all_domains": [
242 | "www.runaflohmarkt.de"
243 | ],
244 | "as_der": "REDACT",
245 | "extensions": {
246 | "authorityInfoAccess": "CA Issuers - URI:http://cert.int-x3.letsencrypt.org/\nOCSP - URI:http://ocsp.int-x3.letsencrypt.org\n",
247 | "authorityKeyIdentifier": "keyid:A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1\n",
248 | "basicConstraints": "CA:FALSE",
249 | "certificatePolicies": "Policy: 1.3.6.1.4.1.44947.1.1.1\n CPS: http://cps.letsencrypt.org\n User Notice: is Certificate may only be relied upon by Relying Parties and only in accordance with the Certificate Policy found at https://letsencrypt.org/repository/",
250 | "ctlSignedCertificateTimestamp": "BIHxAO8AdQBvU3asMfAxGdiZAKRRFf93FRwR2QLBACkGjbIImjfZEwAAAWX9AeXwAAAEAwBGMEQCICIkjDXPcRgbcoKuh8Ciu_1sIVVKj_oGb-bzc8zPyhF2AiAhCQMKgrBcxZpZpGgOEgyBxIX6WqJFDOGamrWW-I55IAB2ACk8UZZUyDlluqpQ_FgH1Ldvv1h6KXLcpMMM9OVFR_R4AAABZf0B56wAAAQDAEcwRQIhAPNKe7X7XqNZF7H4NOWW-DtSvx1jVWxqsZVnknCjrkjrAiBTIKM-qsi4QMFHbTRfxz4tiRvI14vCXDAbyoLgbp6BKw==",
251 | "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
252 | "keyUsage": "Digital Signature, Key Encipherment",
253 | "subjectAltName": "DNS:www.runaflohmarkt.de",
254 | "subjectKeyIdentifier": "7C:82:16:CB:31:94:C6:C5:5C:72:A1:37:CA:AE:B9:9B:3D:73:3E:9B"
255 | },
256 | "fingerprint": "AD:5E:3D:91:50:46:7E:C6:D9:30:FD:65:11:8B:CE:81:FF:29:49:B9",
257 | "not_after": 1545320484,
258 | "not_before": 1537544484,
259 | "serial_number": "36434086EFE2BB58A2068BBA9F2E96B7898",
260 | "subject": {
261 | "C": null,
262 | "CN": "www.runaflohmarkt.de",
263 | "L": null,
264 | "O": null,
265 | "OU": null,
266 | "ST": null,
267 | "aggregated": "/CN=www.runaflohmarkt.de"
268 | }
269 | },
270 | "seen": 1538635262.249552,
271 | "source": {
272 | "name": "Google 'Rocketeer' log",
273 | "url": "ct.googleapis.com/rocketeer/"
274 | },
275 | "update_type": "X509LogEntry"
276 | },
277 | "message_type": "certificate_update"
278 | }
279 | ]
280 |
--------------------------------------------------------------------------------
/tests/test_domain_matching_analyser.py:
--------------------------------------------------------------------------------
1 | '''
2 | Common domain matching analyser.
3 | '''
4 | import copy
5 | import os
6 | import unittest
7 |
8 | from certstream_analytics.analysers import AhoCorasickDomainMatching
9 | from certstream_analytics.analysers import WordSegmentation
10 | from certstream_analytics.analysers import DomainMatching, DomainMatchingOption
11 | from certstream_analytics.analysers import BulkDomainMarker
12 | from certstream_analytics.analysers import IDNADecoder
13 | from certstream_analytics.analysers import HomoglyphsDecoder
14 |
15 |
16 | class DomainMatchingTest(unittest.TestCase):
17 | '''
18 | Test all the common domain matching analysers.
19 | '''
20 | def test_ahocorasick(self):
21 | '''
22 | Compare some mock domains against the list of most popular domains
23 | using Aho-Corasick algorithm.
24 | '''
25 | # Load the mock list of common domains for testing.
26 | current_dir = os.path.dirname(os.path.realpath(__file__))
27 |
28 | with open(os.path.join(current_dir, 'opendns-top-domains.txt')) as fhandle:
29 | domains = [line.rstrip() for line in fhandle]
30 |
31 | ahocorasick_analyser = AhoCorasickDomainMatching(domains)
32 |
33 | cases = [
34 | {
35 | 'data': {
36 | 'all_domains': [
37 | 'store.google.com',
38 | 'google.com',
39 | ],
40 | },
41 | 'expected': [
42 | {
43 | 'analyser': 'AhoCorasickDomainMatching',
44 | 'output': {
45 | 'store.google.com': ['google.com'],
46 | },
47 | },
48 | ],
49 | 'description': 'An exact match domain',
50 | },
51 |
52 | {
53 | 'data': {
54 | 'all_domains': [
55 | 'www.facebook.com.msg40.site',
56 | ],
57 | },
58 | 'expected': [
59 | {
60 | 'analyser': 'AhoCorasickDomainMatching',
61 | 'output': {
62 | 'www.facebook.com.msg40.site': ['facebook.com'],
63 | },
64 | },
65 | ],
66 | 'description': 'A sample phishing domain with a sub-domain match',
67 | },
68 |
69 | {
70 | 'data': {
71 | 'all_domains': [
72 | 'login-appleid.apple.com.managesuppport.co',
73 | ],
74 | },
75 | 'expected': [
76 | {
77 | 'analyser': 'AhoCorasickDomainMatching',
78 | 'output': {
79 | 'login-appleid.apple.com.managesuppport.co': ['apple.com'],
80 | },
81 | },
82 | ],
83 | 'description': 'A sample phishing domain with a partial string match',
84 | },
85 |
86 | {
87 | 'data': {
88 | 'all_domains': [
89 | 'socket.io',
90 | ],
91 | },
92 | 'expected': [],
93 | 'description': 'A non-matching domain (not in the list of most popular domains)',
94 | },
95 |
96 | {
97 | 'data': {
98 | 'all_domains': [
99 | 'www.foobar2000.com',
100 | ],
101 | },
102 | 'expected': [],
103 | 'description': 'A non-matching domain (excluded pattern)',
104 | },
105 |
106 | {
107 | 'data': {
108 | 'all_domains': [
109 | 'autodiscover.blablabla.com',
110 | ],
111 | },
112 | 'expected': [],
113 | 'description': 'Match a ignored pattern',
114 | },
115 | ]
116 |
117 | for case in cases:
118 | got = ahocorasick_analyser.run(case['data'])
119 | self.assertListEqual(got['analysers'], case['expected'], case['description'])
120 |
121 | def test_wordsegmentation(self):
122 | '''
123 | Try to segment some domains and check the result.
124 | '''
125 | wordsegmentation = WordSegmentation()
126 |
127 | cases = [
128 | {
129 | 'data': {
130 | 'all_domains': [
131 | 'store.google.com',
132 | 'google.com',
133 | ],
134 | },
135 | 'expected': [
136 | {
137 | 'analyser': 'WordSegmentation',
138 | 'output': {
139 | 'store.google.com': ['store', 'google', 'com'],
140 | 'google.com': ['google', 'com'],
141 | },
142 | },
143 | ],
144 | 'description': 'A legit domain',
145 | },
146 |
147 | {
148 | 'data': {
149 | 'all_domains': [
150 | 'www.facebook.com.msg40.site',
151 | ],
152 | },
153 | 'expected': [
154 | {
155 | 'analyser': 'WordSegmentation',
156 | 'output': {
157 | 'www.facebook.com.msg40.site': ['www', 'facebook', 'com', 'msg40', 'site'],
158 | },
159 | },
160 | ],
161 | 'description': 'Word segmentation using the domain separator (dot)',
162 | },
163 |
164 | {
165 | 'data': {
166 | 'all_domains': [
167 | 'login-appleid.apple.com.managesuppport.co',
168 | ],
169 | },
170 | 'expected': [
171 | {
172 | 'analyser': 'WordSegmentation',
173 | 'output': {
174 | 'login-appleid.apple.com.managesuppport.co': [
175 | 'login',
176 | 'apple',
177 | 'id',
178 | 'apple',
179 | 'com',
180 | 'manage',
181 | 'suppport',
182 | 'co'
183 | ],
184 | },
185 | },
186 | ],
187 | 'description': 'Word segmentation using dictionary',
188 | },
189 |
190 | {
191 | 'data': {
192 | 'all_domains': [
193 | 'arch.mappleonline.com',
194 | ],
195 | },
196 | 'expected': [
197 | {
198 | 'analyser': 'WordSegmentation',
199 | 'output': {
200 | 'arch.mappleonline.com': ['arch', 'm', 'apple', 'online', 'com'],
201 | },
202 | },
203 | ],
204 | 'description': 'Failed to segment the word correctly',
205 | },
206 |
207 | {
208 | 'data': {
209 | 'all_domains': [
210 | 'www.freybrothersinc.com',
211 | ],
212 | },
213 | 'expected': [
214 | {
215 | 'analyser': 'WordSegmentation',
216 | 'output': {
217 | 'www.freybrothersinc.com': ['www', 'frey', 'brothers', 'com'],
218 | },
219 | },
220 | ],
221 | 'description': 'Ignore certain stop words (inc) when doing segmentation',
222 | },
223 | ]
224 |
225 | for case in cases:
226 | got = wordsegmentation.run(case['data'])
227 | self.assertListEqual(got['analysers'], case['expected'], case['description'])
228 |
229 | def test_domain_matching(self):
230 | '''
231 | Combine the result of all domain matching analysers into one.
232 | '''
233 | # The first option decides if the TLD is included in the match
234 | options = [
235 | (True, DomainMatchingOption.SUBSET_MATCH),
236 | (False, DomainMatchingOption.SUBSET_MATCH),
237 | (True, DomainMatchingOption.ORDER_MATCH),
238 | (False, DomainMatchingOption.ORDER_MATCH),
239 | ]
240 |
241 | analysers = {o: DomainMatching(include_tld=o[0], option=o[1]) for o in options}
242 |
243 | cases = [
244 | {
245 | 'data': {
246 | 'all_domains': [
247 | 'store.google.com',
248 | 'google.com',
249 | ],
250 |
251 | 'analysers': [
252 | {
253 | 'analyser': 'AhoCorasickDomainMatching',
254 | 'output': {
255 | 'store.google.com': ['google.com'],
256 | },
257 | },
258 |
259 | {
260 | 'analyser': 'WordSegmentation',
261 | 'output': {
262 | 'store.google.com': ['store', 'google', 'com'],
263 | 'google.com': ['google', 'com'],
264 | },
265 | },
266 | ],
267 | },
268 | 'expected': {
269 | (True, DomainMatchingOption.SUBSET_MATCH): [],
270 | (False, DomainMatchingOption.SUBSET_MATCH): [],
271 | (True, DomainMatchingOption.ORDER_MATCH): [],
272 | (False, DomainMatchingOption.ORDER_MATCH): [],
273 | },
274 | 'description': 'A legit domain so it will be skipped (no match reported)',
275 | },
276 |
277 | {
278 | 'data': {
279 | 'all_domains': [
280 | 'login-appleid.managesuppport.com',
281 | ],
282 |
283 | 'analysers': [
284 | {
285 | 'analyser': 'AhoCorasickDomainMatching',
286 | 'output': {
287 | 'login-appleid.managesuppport.com': ['apple.com'],
288 | },
289 | },
290 |
291 | {
292 | 'analyser': 'WordSegmentation',
293 | 'output': {
294 | 'login-appleid.managesuppport.com': [
295 | 'login',
296 | 'apple',
297 | 'id',
298 | 'manage',
299 | 'suppport'
300 | ],
301 | },
302 | },
303 | ],
304 | },
305 | 'expected': {
306 | (True, DomainMatchingOption.SUBSET_MATCH): [],
307 | (False, DomainMatchingOption.SUBSET_MATCH): [
308 | {
309 | 'analyser': 'DomainMatching',
310 | 'output': {
311 | 'login-appleid.managesuppport.com': ['apple.com']
312 | },
313 | },
314 | ],
315 | (True, DomainMatchingOption.ORDER_MATCH): [],
316 | (False, DomainMatchingOption.ORDER_MATCH): [
317 | {
318 | 'analyser': 'DomainMatching',
319 | 'output': {
320 | 'login-appleid.managesuppport.com': ['apple.com']
321 | },
322 | },
323 | ],
324 | },
325 | 'description': 'Find a matching phishing domain',
326 | },
327 |
328 | {
329 | 'data': {
330 | 'all_domains': [
331 | 'djunprotected.com',
332 | 'www.djunprotected.com'
333 | ],
334 |
335 | 'analysers': [
336 | {
337 | 'analyser': 'AhoCorasickDomainMatching',
338 | 'output': {
339 | 'djunprotected.com': ['ted.com']
340 | }
341 | },
342 |
343 | {
344 | 'analyser': 'WordSegmentation',
345 | 'output': {
346 | 'djunprotected.com': ['dj', 'unprotected', 'com'],
347 | 'www.djunprotected.com': ['www', 'dj', 'unprotected', 'com']
348 | }
349 | },
350 | ],
351 | },
352 | 'expected': {
353 | (True, DomainMatchingOption.SUBSET_MATCH): [],
354 | (False, DomainMatchingOption.SUBSET_MATCH): [],
355 | (True, DomainMatchingOption.ORDER_MATCH): [],
356 | (False, DomainMatchingOption.ORDER_MATCH): [],
357 | },
358 | 'description': 'Find a matching phishing domain',
359 | },
360 | ]
361 |
362 | for case in cases:
363 | for option, analyser in analysers.items():
364 | expected = copy.deepcopy(case['data']['analysers'])
365 | expected.extend(case['expected'][option])
366 |
367 | got = analyser.run(case['data'])
368 | self.assertListEqual(got['analysers'], expected,
369 | '{} ({})'.format(case['description'], option))
370 |
371 | def test_bulk_domain_marker(self):
372 | '''
373 | Test the bulk domain analyser.
374 | '''
375 | bulky = BulkDomainMarker()
376 |
377 | cases = [
378 | {
379 | 'data': {
380 | 'all_domains': [
381 | 'store.google.com',
382 | 'google.com',
383 | ],
384 | },
385 | 'expected': [
386 | {'analyser': 'BulkDomainMarker', 'output': False}
387 | ],
388 | 'description': 'Not a bulk record',
389 | },
390 | {
391 | 'data': {
392 | 'all_domains': [
393 | 'a.com',
394 | 'b.com',
395 | 'c.com',
396 | 'd.com',
397 | 'e.com',
398 | 'f.com',
399 | 'g.com',
400 | 'h.com',
401 | 'i.com',
402 | 'j.com',
403 | 'k.com',
404 | 'l.com',
405 | 'm.com',
406 | 'n.com',
407 | 'o.com',
408 | ],
409 | },
410 | 'expected': [
411 | {'analyser': 'BulkDomainMarker', 'output': True}
412 | ],
413 | 'description': 'Mark a bulk record',
414 | },
415 | ]
416 |
417 | for case in cases:
418 | got = bulky.run(case['data'])
419 | self.assertListEqual(got['analysers'], case['expected'], case['description'])
420 |
421 | def test_idn_decoder(self):
422 | '''
423 | Test the IDNA decoder.
424 | '''
425 | decoder = IDNADecoder()
426 |
427 | cases = [
428 | {
429 | 'data': {
430 | 'all_domains': [
431 | 'store.google.com',
432 | 'google.com',
433 | ],
434 | },
435 | 'expected': [
436 | 'store.google.com',
437 | 'google.com',
438 | ],
439 | 'description': 'There is no domain in IDNA format',
440 | },
441 | {
442 | 'data': {
443 | 'all_domains': [
444 | 'xn--f1ahbgpekke1h.xn--p1ai',
445 | 'tigrobaldai.lt'
446 | ],
447 | },
448 | 'expected': [
449 | 'укрэмпужск.рф',
450 | 'tigrobaldai.lt'
451 | ],
452 | 'description': 'Convert some domains in IDNA format',
453 | },
454 | {
455 | 'data': {
456 | 'all_domains': [
457 | 'xn--foobar.xn--me',
458 | ],
459 | },
460 | 'expected': [
461 | 'xn--foobar.xn--me',
462 | ],
463 | 'description': 'Handle an invalid IDNA string',
464 | },
465 | {
466 | 'data': {
467 | 'all_domains': [
468 | '*.xn---35-5cd3cln6a9bzb.xn--p1ai',
469 | '*.nl-dating-vidkid.com',
470 | ],
471 | },
472 | 'expected': [
473 | '*.отмычка-35.рф',
474 | '*.nl-dating-vidkid.com',
475 | ],
476 | 'description': 'Handle an invalid code point',
477 | },
478 | ]
479 |
480 | for case in cases:
481 | got = decoder.run(case['data'])
482 | self.assertListEqual(got['all_domains'], case['expected'], case['description'])
483 |
484 | def test_homoglyphs_decoder(self):
485 | '''
486 | Test the homoglyphs decoder.
487 | '''
488 | cases = [
489 | {
490 | 'data': {
491 | 'all_domains': [
492 | 'store.google.com',
493 | '*.google.com',
494 | ],
495 | },
496 | 'greedy': False,
497 | 'expected': [
498 | 'store.google.com',
499 | '*.google.com',
500 | ],
501 | 'description': 'Normal domains in ASCII',
502 | },
503 | {
504 | 'data': {
505 | 'all_domains': [
506 | 'store.google.com',
507 | '*.google.com',
508 | ],
509 | },
510 | 'greedy': True,
511 | 'expected': [
512 | 'store.google.com',
513 | 'store.google.corn',
514 | 'store.googie.com',
515 | 'store.googie.corn',
516 | '*.google.com',
517 | '*.google.corn',
518 | '*.googie.com',
519 | '*.googie.corn'
520 | ],
521 | 'description': 'Normal domains in ASCII with a greedy decoder',
522 | },
523 | {
524 | 'data': {
525 | 'all_domains': [
526 | 'укрэмпужск.рф',
527 | 'tigrobaldai.lt',
528 | ],
529 | },
530 | 'greedy': False,
531 | 'expected': [
532 | 'yкpэмпyжcк.pф',
533 | 'tigrobaldai.lt',
534 | ],
535 | 'description': 'Normal domains in Unicode',
536 | },
537 | {
538 | 'data': {
539 | 'all_domains': [
540 | 'укрэмпужск.рф',
541 | 'tigrobaldai.lt',
542 | ],
543 | },
544 | 'greedy': True,
545 | 'expected': [
546 | 'yкpэмпyжcк.pф',
547 | 'tigrobaldai.lt',
548 | 'tigrobaldai.it',
549 | 'tigrobaidai.lt',
550 | 'tigrobaidai.it',
551 | ],
552 | 'description': 'Normal domains in Unicode with a greedy decoder',
553 | },
554 | {
555 | 'data': {
556 | 'all_domains': [
557 | # MATHEMATICAL MONOSPACE SMALL P 1D699
558 | '*.𝗉aypal.com',
559 |
560 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO
561 | 'phishing.𝗉ay𝞀al.com',
562 | ],
563 | },
564 | 'greedy': False,
565 | 'expected': [
566 | '*.paypal.com',
567 | 'phishing.paypal.com',
568 | ],
569 | 'description': 'Phishing example in confusable homoglyphs'
570 | },
571 | {
572 | 'data': {
573 | 'all_domains': [
574 | # MATHEMATICAL MONOSPACE SMALL P 1D699
575 | '*.𝗉aypal.com',
576 |
577 | # MATHEMATICAL SAN-SERIF BOLD SMALL RHO
578 | 'phishing.𝗉ay𝞀al.com',
579 | ],
580 | },
581 | 'greedy': True,
582 | 'expected': [
583 | '*.paypal.com',
584 | '*.paypal.corn',
585 | '*.paypai.com',
586 | '*.paypai.corn',
587 | 'phishing.paypal.com',
588 | 'phishing.paypal.corn',
589 | 'phishing.paypai.com',
590 | 'phishing.paypai.corn',
591 | ],
592 | 'description': 'Phishing example in confusable homoglyphs with a greedy decoder'
593 | },
594 | ]
595 |
596 | for case in cases:
597 | decoder = HomoglyphsDecoder(greedy=case['greedy'])
598 |
599 | got = decoder.run(case['data'])
600 | self.assertListEqual(got['all_domains'], case['expected'], case['description'])
601 |
--------------------------------------------------------------------------------
/scripts/sundry/certstream-domain-features.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 145,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import json\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "domains = []\n",
13 | "features = []\n",
14 | "\n",
15 | "with open('domain-matching.20181014.decoded') as f:\n",
16 | " for line in f:\n",
17 | " record = json.loads(line.strip()) \n",
18 | " \n",
19 | " domains.extend(list(record['analysers'][0]['output'].keys()))\n",
20 | " features.extend(record['analysers'][-1]['output'])"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 146,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "14004"
32 | ]
33 | },
34 | "execution_count": 146,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "len(domains)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 147,
46 | "metadata": {},
47 | "outputs": [
48 | {
49 | "data": {
50 | "text/plain": [
51 | "14004"
52 | ]
53 | },
54 | "execution_count": 147,
55 | "metadata": {},
56 | "output_type": "execute_result"
57 | }
58 | ],
59 | "source": [
60 | "len(features)"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 148,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "columns = ['NumberOfParts', 'Length', 'LongestPart', 'TLD', 'Randomness']"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 149,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "import pandas as pd\n",
79 | "\n",
80 | "df = pd.DataFrame(data=features, columns=columns, index=domains)"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 150,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/plain": [
91 | "(14004, 5)"
92 | ]
93 | },
94 | "execution_count": 150,
95 | "metadata": {},
96 | "output_type": "execute_result"
97 | }
98 | ],
99 | "source": [
100 | "df.shape"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 151,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/html": [
111 | "
\n",
112 | "\n",
125 | "
\n",
126 | " \n",
127 | " \n",
128 | " | \n",
129 | " NumberOfParts | \n",
130 | " Length | \n",
131 | " LongestPart | \n",
132 | " TLD | \n",
133 | " Randomness | \n",
134 | "
\n",
135 | " \n",
136 | " \n",
137 | " \n",
138 | " | www.sawyerrshousegivebackafrica.co.uk | \n",
139 | " 4 | \n",
140 | " 37 | \n",
141 | " 27 | \n",
142 | " 2 | \n",
143 | " 0.0 | \n",
144 | "
\n",
145 | " \n",
146 | "
\n",
147 | "
"
148 | ],
149 | "text/plain": [
150 | " NumberOfParts Length LongestPart \\\n",
151 | "www.sawyerrshousegivebackafrica.co.uk 4 37 27 \n",
152 | "\n",
153 | " TLD Randomness \n",
154 | "www.sawyerrshousegivebackafrica.co.uk 2 0.0 "
155 | ]
156 | },
157 | "execution_count": 151,
158 | "metadata": {},
159 | "output_type": "execute_result"
160 | }
161 | ],
162 | "source": [
163 | "df.sample()"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 152,
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "data": {
173 | "text/plain": [
174 | "count 6.000000\n",
175 | "mean 2334.000000\n",
176 | "std 2431.878862\n",
177 | "min 5.000000\n",
178 | "25% 109.000000\n",
179 | "50% 2355.000000\n",
180 | "75% 4507.250000\n",
181 | "max 4704.000000\n",
182 | "Name: NumberOfParts, dtype: float64"
183 | ]
184 | },
185 | "execution_count": 152,
186 | "metadata": {},
187 | "output_type": "execute_result"
188 | }
189 | ],
190 | "source": [
191 | "df['NumberOfParts'].value_counts().describe()"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 153,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "data": {
201 | "text/plain": [
202 | ""
203 | ]
204 | },
205 | "execution_count": 153,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | },
209 | {
210 | "data": {
211 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAEvCAYAAADijX30AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFqtJREFUeJzt3X+s1nX9//HHxTmAID+OKJyji/yWWrp+bpWIsFzHzwEVTbDDam0tMGeSy5FlM1v+SKHPFpUzvksZG9NWm4IebB03NCgPTM2VU8ppjYoNGueiEA5g6PEcru8ffjvLj/Xhh16vwzncbn/Bm+t6X8/3c+xwP9d1uK5KrVarBQCAYkYM9gAAAMcbAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUFjjYA/wvzl48GD6+4fmJyU1NFSG7OxDlZ2XZ+fl2Xl5dl7eUN35yJENh33bYzrA+vtr2bPnH4M9xlFpaho7ZGcfquy8PDsvz87Ls/PyhurOJ08ef9i39RIkAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQ2DH9WZD1Nm7CmIwZXb8VHMlnQh2JA6/2Zf/eA3U5NwBQf8d1gI0Z3Zj/c2PnYI9xxLb+95zsH+whAICj5iVIAIDCBBgAQGECDACgMAEGAFCYAAMAKEyAAQAUJsAAAAo7rt8HjPJOntiYEaPG1O389Xrz24O9B7Krp68u5wbg+CPAKGrEqDHJrRMHe4wjNuLWniT7BnsMAIYJL0ECABQmwAAAChNgAACFCTAAgMIEGABAYf4XJAxz45pGZszIE+p2/nq99ceB117J/j2v1eXcAINNgMEwN2bkCfnAvR8Y7DGO2O8+/7vsjwADhicvQQIAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADACjssAOsv78/c+fOzRe/+MUkybZt2zJ//vy0tbVl8eLF6e3tTZL09vZm8eLFaWtry/z587N9+/aBc9xzzz1pa2vL7Nmzs3Hjxrf5UgAAhobDDrD77rsvZ5xxxsDvly1blgULFuSxxx7LhAkTsmbNmiTJ6tWrM2HChDz22GNZsGBBli1bliTZsmVLOjs709nZmZUrV+a2225Lf3//23w5AADHvsMKsO7u7vzqV79Ke3t7kqRWq+Wpp57K7NmzkyTz5s3L+vXrkyQbNmzIvHnzkiSzZ8/Ok08+mVqtlvXr12fOnDkZNWpUpk6dmtNPPz2bN2+uxzUBABzTDivAli5dmhtuuCEjRrx+8927d2fChAlpbGxMkrS0tKRarSZJqtVqTj311CRJY2Njxo8fn927d6daraalpWXgnM3NzQP3AQA4njQe6ga//OUvM2nSpLz//e/Pr3/96xIzDWhoqKSpaWzRxxwq7KU8Oy/Pzt+soWGEvRRm5+UdDzs/ZIA988wz2bBhQ7q6uvLqq69m//79WbJkSfbu3Zu+vr40Njamu7s7zc3NSV5/ZmvHjh1paWlJX19f9u3bl5NOOinNzc3p7u4eOG+1Wh24z3/S31/Lnj3/eIuX+J9Nnjy+bueut3rupZ7svDw7H16amsbaS2F2Xt5Q3fmRfL095EuQX/3qV9PV1ZUNGzbk+9//fs4777x873vfy7Rp07Ju3bokSUdHR1pbW5Mkra2t6ejoSJKsW7cu5513XiqVSlpbW9PZ2Zne3t5s27YtW7duzQc/+MGjuT4AgCHtqN8H7IYbbsiqVavS1taWPXv2ZP78+UmS9vb27NmzJ21tbVm1alW+9rWvJUnOOuusXHzxxbnkkkty1VVX5eabb05DQ8PbcxUAAEPIIV+C/FfTpk3LtGnTkiRTp04deOuJfzV69Ojcdddd//b+ixYtyqJFi45iTACA4cM74QMAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoLBDBtirr76a9vb2fPKTn8ycOXNy1113JUm2bduW+fPnp62tLYsXL05vb2+SpLe3N4sXL05bW1vmz5+f7du3D5zrnnvuSVtbW2bPnp2NGzfW6ZIAAI5thwywUaNG5d57783PfvazrF27Nhs3bsyzzz6bZcuWZcGCBXnssccyYcKErFmzJkmyevXqTJgwIY899lgWLFiQZcuWJUm2bNmSzs7OdHZ2ZuXKlbntttvS399f36sDADgGHTLAKpVKTjzxxCRJX19f+vr6UqlU8tRTT2X27NlJknnz5mX9+vVJkg0bNmTevHlJktmzZ+fJJ59MrVbL+vXrM2fOnIwaNSpTp07N6aefns2bN9frugAAjlmH9TNg/f39ufzyy3P++efn/PPPz9SpUzNhwoQ0NjYmSVpaWlKtVpMk1Wo1p556apKksbEx48ePz+7du1OtVtPS0jJwzubm5oH7AAAcTxoP50YNDQ15+OGHs3fv3lx77bX585//XO+5/v/jVtLUNLbIYw019lKenZdn52/W0DDCXgqz8/KOh50fVoD904QJEzJt2rQ8++yz2bt3b/r6+tLY2Jju7u40Nzcnef2ZrR07dqSlpSV9fX3Zt29fTjrppDQ3N6e7u3vgXNVqdeA+/0l/fy179vzjKC7r8EyePL5u5663eu6lnuy8PDsfXpqaxtpLYXZe3lDd+ZF8vT3kS5AvvfRS9u7dmyR55ZVX8sQTT+SMM87ItGnTsm7duiRJR0dHWltbkyStra3p6OhIkqxbty7nnXdeKpVKWltb09nZmd7e3mzbti1bt27NBz/4wSO+OACAoe6Qz4Dt3LkzN954Y/r7+1Or1XLRRRflE5/4RM4888x85StfyZ133plzzjkn8+fPT5K0t7fnhhtuSFtbWyZOnJgf/OAHSZKzzjorF198cS655JI0NDTk5ptvTkNDQ32vDgDgGHTIADv77LOzdu3aNx2fOnXqwFtP/KvRo0cPvFfY/7Ro0aIsWrToKMYEABg+vBM+AEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKE2AAAIUJMACAwgQYAEBhAgwAoDABBgBQmAADAChMgAEAFCbAAAAKO2SA7dixI5/73OdyySWXZM6cObn33nuTJHv27MnChQsza9asLFy4MD09PUmSWq2WO+64I21tbbnsssvy/PPPD5yro6Mjs2bNyqxZs9LR0VGnSwIAOLYdMsAaGhpy44035pFHHsn999+fn/70p9myZUtWrFiR6dOn59FHH8306dOzYsWKJElXV1e2bt2aRx99NLfffntuvfXWJK8H2/Lly/PAAw9k9erVWb58+UC0AQAcTw4ZYFOmTMn73ve+JMm4cePy7ne/O9VqNevXr8/cuXOTJHPnzs0vfvGLJBk4XqlU8uEPfzh79+7Nzp07s2nTpsyYMSNNTU2ZOHFiZsyYkY0bN9bx0gAAjk1H9DNg27dvzwsvvJAPfehD2bVrV6ZMmZIkmTx5cnbt2pUkqVaraWlpGbhPS0tLqtXqm443NzenWq2+HdcAADCkNB7uDV9++eVcd911uemmmzJu3Lg3/FmlUkmlUnnbh2toqKSpaezbft7hwF7Ks/Py7PzNGhpG2Ethdl7e8bDzwwqw1157Ldddd10uu+yyzJo1K0ly8sknZ+fOnZkyZUp27tyZSZMmJXn9ma3u7u6B+3Z3d6e5uTnNzc15+umnB45Xq9Wce+65/+vj9vfXsmfPP474og7X5Mnj63bueqvnXurJzsuz8+GlqWmsvRRm5+UN1Z0fydfbQ74EWavV8s1vfjPvfve7s3DhwoHjra2tWbt2bZJk7dq1ufDCC99wvFar5dlnn8348eMzZcqUzJw5M5s2bUpPT096enqyadOmzJw580ivDQBgyDvkM2C//e1v8/DDD+c973lPLr/88iTJ9ddfn6uvvjqLFy/OmjVrctppp+XOO+9MklxwwQV5/PHH09bWljFjxmTp0qVJkqampnzpS19Ke3t7kuTaa69NU1NTva4LAOCYdcgA++hHP5o//OEP//bP/vmeYP+qUqnklltu+be3b29vHwgwAIDjlXfCBwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYYcMsG984xuZPn16Lr300oFje/bsycKFCzNr1qwsXLgwPT09SZJarZY77rgjbW1tueyyy/L8888P3KejoyOzZs3KrFmz0tHRUYdLAQAYGg4ZYFdccUVWrlz5hmMrVqzI9OnT8+ijj2b69OlZsWJFkqSrqytbt27No48+mttvvz233nprkteDbfny5XnggQeyevXqLF++fCDaAACON4cMsI997GOZOHHiG46tX78+c+fOTZLMnTs3v/jFL95wvFKp5MMf/nD27t2bnTt3ZtOmTZkxY0aampoyceLEzJgxIxs3bqzD5QAAHPuO6mfAdu3alSlTpiRJJk+enF27diVJqtVqWlpaBm7X0tKSarX6puPNzc2pVqtvZW4AgCGr8a2eoFKppFKpvB2zvElDQyVNTWPrcu6hzl7Ks/Py7PzNGhpG2Ethdl7e8bDzowqwk08+OTt37syUKVOyc+fOTJo0Kcnrz2x1d3cP3K67uzvNzc1pbm7O008/PXC8Wq3m3HPPPeTj9PfXsmfPP45mxMMyefL4up273uq5l3qy8/LsfHhpahprL4XZeXlDdedH8vX2qF6CbG1tzdq1a5Mka9euzYUXXviG47VaLc8++2zGjx+fKVOmZObMmdm0aVN6enrS09OTTZs2ZebMmUfz0AAAQ94hnwG7/vrr8/TTT2f37t35+Mc/ni9/+cu5+uqrs3jx4qxZsyannXZa7rzzziTJBRdckMcffzxtbW0ZM2ZMli5dmiRpamrKl770pbS3tydJrr322jQ1NdXxsgAAjl2HDLDvf//7//b4vffe+6ZjlUolt9xyy7+9fXt7+0CAAQAcz7wTPgBAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKAwAQYAUJgAAwAoTIABABQmwAAAChNgAACFCTAAgMIEGABAYQIMAKCwxsEeAGC4OWncyDSOOaFu5588eXxdztt34JXs3v9aXc4NvJEAA3ibNY45IS+cfc5gj3HEznnxhUSAQRFeggQAKEyAAQAUJsAAAAoTYAAAhQkwAIDCBBgAQGECDACgMAEGAFCYAAMAKEyAAQAUVjzAurq6Mnv27LS1tWXFihWlHx4AYNAVDbD+/v58+9vfzsqVK9PZ2Zmf//zn2bJlS8kRAAAGXdEP4968eXNOP/30TJ06NUkyZ86crF+/PmeeeWbJMQAYZiZOGJNRo+v3T9rkyePrct7eV/vSs/dAXc7Nsa1ogFWr1bS0tAz8vrm5OZs3by45AgDD0KjRjfm/12wY7DGO2LV3tw72CAySogF2pEaObKjbdx3/tPW/59T1/PVS773U1a09gz3BURnKO//d53832CMclaG883NefGGwRzgqQ3nnQzVmhvLO62m476Xoz4A1Nzenu7t74PfVajXNzc0lRwAAGHRFA+wDH/hAtm7dmm3btqW3tzednZ1pbR2a37EAABytoi9BNjY25uabb85VV12V/v7+fOpTn8pZZ51VcgQAgEFXqdVqtcEeAgDgeOKd8AEAChNgAACFCTAAgMIEGABAYQKsDr7+9a8P9gjHhT/96U958skn8/LLL7/heFdX1yBNNPxt3rx54NMrtmzZklWrVuXxxx8f5KmOH7/5zW+yatWqbNq0abBHGbaee+657N+/P0nyyiuv5K677so111yT7373u9m3b98gTzc83XfffdmxY8dgj1Gc/wX5Fl1zzTVvOvbrX/8606ZNS5LcfffdpUc6Ltx33335yU9+kjPOOCMvvvhibrrppvzXf/1XkmTevHnp6OgY5AmHn+XLl6erqyt9fX2ZMWNGnnvuuUybNi1PPPFEZs6cmUWLFg32iMNOe3t71qxZkyR54IEH8pOf/CRtbW3ZtGlTWltbc/XVVw/yhMPPnDlz8vDDD6exsTHf+ta3csIJJ2T27Nl56qmn8uKLL2b58uWDPeKw85GPfCRjxozJO9/5zsyZMycXX3xxJk2aNNhj1d0x/VFEQ0G1Ws0ZZ5yR+fPnp1KppFar5fe//32uvPLKwR5tWFu9enUeeuihnHjiidm+fXuuu+66/PWvf83nP//5+J6iPtatW5e1a9emt7c3M2bMSFdXV8aNG5cvfOELmT9/vgCrg76+voFf33///Vm1alUmTZqUK6+8Mp/+9KcFWB0cPHgwjY2v/9P4+9//fuCbuY9+9KO5/PLLB3O0YWvq1Kl56KGH8sQTT+SRRx7JD3/4w7zvfe/LpZdemra2towbN26wR6wLL0G+RQ8++GDe//735+6778748eMzbdq0jB49Oueee27OPffcwR5v2Dp48GBOPPHEJMk73vGO/PjHP05XV1e+853vCLA6aWhoSENDw8B3qv/8onjCCSdkxAhfSurh4MGD6enpye7du1Or1QaeFRg7dmwaGhoGebrh6ayzzsqDDz6YJDn77LPzu9+9/jmqf/nLXwbCjLdXpVLJiBEjMnPmzCxdujQbN27MZz/72WzcuHHglY3hyN+mt2jEiBFZsGBBLrrooixdujSnnHJK+vv7B3usYe/kk0/OCy+8kHPOOSdJcuKJJ+aee+7JTTfdlD/+8Y+DPN3wNHLkyBw4cCBjxozJQw89NHB83759AqxO9u/fnyuuuCK1Wi2VSiU7d+7MlClT8vLLL/tGo06WLFmSJUuW5Ec/+lFOOumkfOYzn0lLS0tOPfXULFmyZLDHG5b+59/lkSNH5sILL8yFF16YAwcODNJU9ednwN5mv/rVr/LMM8/k+uuvH+xRhrXu7u40NDRk8uTJb/qz3/72t/nIRz4yCFMNb729vRk1atSbjr/00kv529/+lve+972DMNXx6cCBA/n73/+eqVOnDvYow9b+/fuzffv29PX1paWlJaeccspgjzRs/eUvf8m73vWuwR6jOAEGAFCY1w0AAAoTYAAAhQkwAIDCBBgAQGECDACgsP8HRsWezpLvOk4AAAAASUVORK5CYII=\n",
212 | "text/plain": [
213 | ""
214 | ]
215 | },
216 | "metadata": {},
217 | "output_type": "display_data"
218 | }
219 | ],
220 | "source": [
221 | "import matplotlib.pyplot as plt\n",
222 | "import seaborn\n",
223 | "\n",
224 | "seaborn.set_style(\"darkgrid\")\n",
225 | "\n",
226 | "plt.figure(figsize=(10,5))\n",
227 | "df['NumberOfParts'].value_counts().plot(kind='bar')"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 154,
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "data": {
237 | "text/plain": [
238 | ""
239 | ]
240 | },
241 | "execution_count": 154,
242 | "metadata": {},
243 | "output_type": "execute_result"
244 | },
245 | {
246 | "data": {
247 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEvCAYAAABhSUTPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADlhJREFUeJzt3V1o3Qf9x/HPabJqa5fG1jyoRP9uTipVvHASM0Qws43SSTO1InrhClIFoWyVSX1EhA3ROYp6FQbSXQhitamsgz1kSoulgsp8wAcYdtDJmkA0a+u21pzmfzEs7v8fnCzftCenfb3ucnJyzgeaX86b8zs9p7GwsLAQAACWZFW7BwAAdDIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUNB9Oe/swoULaTZ9eg2tdXU1/K4Ay87fFl6Oa67pWtT1LmtMNZsLmZt79nLeJR2qt3et3xVg2fnbwsvR13ftoq7nNB8AQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFl/Wz+Xhp63rWZM0r/FP8X4v9TKSryXPn5nP29HPtngHAf/EIvgKseUV3/mfv4XbPoAM8+c1tOdvuEQC8iNN8AAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAWLjqlms5nx8fF85jOfSZKcPHkyO3bsyJYtW3L77bfn/Pnzl2wkAMBKteiYuv/++3P99ddf/Pqee+7JbbfdlkceeSQ9PT05cODAJRkIALCSLSqmTp06lV/84hf56Ec/miRZWFjI8ePHMzY2liS59dZbMzU1delWAgCsUIuKqbvvvjt33nlnVq164er//Oc/09PTk+7u7iTJ4OBgpqenL91KAIAVqrvVFX7+859nw4YNedvb3pZf/epXpTvr6mqkt3dt6TbgaucYgqXr6lrlGGLZtYyp3/72t3nsscdy5MiRnDt3LmfPns1dd92V06dPZ35+Pt3d3Tl16lQGBgZa3lmzuZC5uWeXZfiVpK/v2nZPoIM4hmDpenvXOoZYtMU+Prc8zff5z38+R44cyWOPPZZ777037373u/Od73wnw8PDeeihh5IkBw8ezOjoaG0xAEAHWvL7TN155535wQ9+kC1btmRubi47duxYzl0AAB2h5Wm+/zY8PJzh4eEkydDQkLdDAACuet4BHQCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABR0t7rCuXPn8slPfjLnz59Ps9nM2NhYdu/enZMnT2bPnj2Zm5vL5s2b861vfSurV6++HJsBAFaMls9MrV69Ovv378/PfvazTE5O5ujRo3n88cdzzz335LbbbssjjzySnp6eHDhw4HLsBQBYUVrGVKPRyKte9aokyfz8fObn59NoNHL8+PGMjY0lSW699dZMTU1d2qUAACvQol4z1Ww2s3379tx000256aabMjQ0lJ6ennR3v3CWcHBwMNPT05d0KADAStTyNVNJ0tXVlUOHDuX06dP53Oc+l7/97W9LurOurkZ6e9cu6WeBFziGYOm6ulY5hlh2i4qp/+jp6cnw8HAef/zxnD59OvPz8+nu7s6pU6cyMDDQ8uebzYXMzT275LFXqr6+a9s9gQ7iGIKl6+1d6xhi0Rb7+NzyNN8//vGPnD59Okny/PPP59ixY7n++uszPDychx56KEly8ODBjI6OFuYCAHSmls9MzczMZO/evWk2m1lYWMgHPvCBvO9978ub3/zm3HHHHdm3b1/e+ta3ZseOHZdjLwDAitIypjZt2pTJycn/d/nQ0JC3QwAArnreAR0AoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUdLe6wtNPP50vfOELmZ2dTaPRyMc+9rF86lOfytzcXO644478/e9/z+tf//rs27cv69evvxybAQBWjJbPTHV1dWXv3r158MEH86Mf/Sg//OEP88QTT2RiYiIjIyN5+OGHMzIykomJicuxFwBgRWkZU/39/dm8eXOSZN26dbnuuusyPT2dqampjI+PJ0nGx8fz6KOPXtqlAAAr0Mt6zdRTTz2VP//5z3nHO96R2dnZ9Pf3J0n6+voyOzt7SQYCAKxkLV8z9R//+te/snv37nzpS1/KunXrXvS9RqORRqPR8ja6uhrp7V378lcCFzmGYOm6ulY5hlh2i4qpf//739m9e3c+9KEPZevWrUmSjRs3ZmZmJv39/ZmZmcmGDRta3k6zuZC5uWdri69AfX3XtnsCHcQxBEvX27vWMcSiLfbxueVpvoWFhXz5y1/Oddddl507d168fHR0NJOTk0mSycnJ3HzzzUucCgDQuVo+M/Wb3/wmhw4dylve8pZs3749SbJnz57s2rUrt99+ew4cOJDXve512bdv3yUfCwCw0rSMqRtvvDF//etfX/J7+/fvX/ZBAACdxDugAwAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgIKWMfXFL34xIyMjueWWWy5eNjc3l507d2br1q3ZuXNnnnnmmUs6EgBgpWoZUx/+8Idz3333veiyiYmJjIyM5OGHH87IyEgmJiYu2UAAgJWsZUy9613vyvr161902dTUVMbHx5Mk4+PjefTRRy/NOgCAFW5Jr5manZ1Nf39/kqSvry+zs7PLOgoAoFN0V2+g0Wik0Wgs6rpdXY309q6t3iVc1RxDsHRdXascQyy7JcXUxo0bMzMzk/7+/szMzGTDhg2L+rlmcyFzc88u5S6vaH1917Z7Ah3EMQRL19u71jHEoi328XlJp/lGR0czOTmZJJmcnMzNN9+8lJsBAOh4LWNqz549+fjHP54TJ07kve99b3784x9n165d+eUvf5mtW7fm2LFj2bVr1+XYCgCw4rQ8zXfvvfe+5OX79+9f9jEAAJ3GO6ADABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQIGYAgAoEFMAAAViCgCgoLvdAwC4NDau786q1WvaPWPF6eu7tt0TVpwL55/L7DPz7Z7RscQUwBVq1eo1ydfXt3sGHWDV159JcqbdMzqW03wAAAViCgCgQEwBABSIKQCAAjEFAFAgpgAACsQUAECBmAIAKBBTAAAFYgoAoEBMAQAUiCkAgAIxBQBQIKYAAArEFABAgZgCACgQUwAABWIKAKBATAEAFIgpAIACMQUAUCCmAAAKxBQAQEEppo4cOZKxsbFs2bIlExMTy7UJAKBjLDmmms1mvvGNb+S+++7L4cOH88ADD+SJJ55Yzm0AACvekmPq97//fd74xjdmaGgoq1evzrZt2zI1NbWc2wAAVrwlx9T09HQGBwcvfj0wMJDp6ellGQUA0Cm6L+edXXNNV/r6rr2cd9kxnvzmtnZPoEM4hnhZvv5MuxfQIfxtWbolPzM1MDCQU6dOXfx6eno6AwMDyzIKAKBTLDmm3v72t+fJJ5/MyZMnc/78+Rw+fDijo6PLuQ0AYMVb8mm+7u7ufO1rX8unP/3pNJvNfOQjH8kNN9ywnNsAAFa8xsLCwkK7RwAAdCrvgA4AUCCmAAAKxBQAQIGYAgAoEFO03e9+97ucPXs2SfL888/nu9/9bj772c/m29/+ds6cOdPmdUCnuv/++/P000+3ewZXAf+bj7bbtm1bDh06lO7u7nz1q1/NK1/5yoyNjeX48eP5y1/+ku9///vtngh0oHe+851Zs2ZN3vCGN2Tbtm354Ac/mA0bNrR7Flegy/pxMvBSLly4kO7uF34V//jHP+bgwYNJkhtvvDHbt29v5zSggw0NDeWnP/1pjh07lgcffDDf+973snnz5txyyy3ZsmVL1q1b1+6JXCGc5qPtbrjhhvzkJz9JkmzatCl/+MMfkiQnTpy4GFkAL1ej0ciqVavynve8J3fffXeOHj2aT3ziEzl69Gje//73t3seVxCn+Wi7M2fO5K677sqvf/3rvPrVr86f/vSnDA4O5rWvfW2+8pWvZNOmTe2eCHSg8fHxTE5OvuT3nnvuuaxZs+YyL+JKJaZYMc6ePZunnnoq8/PzGRwczGte85p2TwI62IkTJ/KmN72p3TO4CogpAIACr5kCACgQUwAABWIKAKBATAEAFIgpAICC/wWjtKP6Gu64AgAAAABJRU5ErkJggg==\n",
248 | "text/plain": [
249 | ""
250 | ]
251 | },
252 | "metadata": {},
253 | "output_type": "display_data"
254 | }
255 | ],
256 | "source": [
257 | "plt.figure(figsize=(10,5))\n",
258 | "df.loc[df['NumberOfParts'] > 5]['NumberOfParts'].value_counts().plot(kind='bar')"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 156,
264 | "metadata": {},
265 | "outputs": [
266 | {
267 | "data": {
268 | "text/html": [
269 | "\n",
270 | "\n",
283 | "
\n",
284 | " \n",
285 | " \n",
286 | " | \n",
287 | " NumberOfParts | \n",
288 | " Length | \n",
289 | " LongestPart | \n",
290 | " TLD | \n",
291 | " Randomness | \n",
292 | "
\n",
293 | " \n",
294 | " \n",
295 | " \n",
296 | " | waws-prod-blu-43680001.state.p.azurewebsites.windows.net | \n",
297 | " 6 | \n",
298 | " 56 | \n",
299 | " 22 | \n",
300 | " 3 | \n",
301 | " 0.0 | \n",
302 | "
\n",
303 | " \n",
304 | "
\n",
305 | "
"
306 | ],
307 | "text/plain": [
308 | " NumberOfParts Length \\\n",
309 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 6 56 \n",
310 | "\n",
311 | " LongestPart TLD \\\n",
312 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 22 3 \n",
313 | "\n",
314 | " Randomness \n",
315 | "waws-prod-blu-43680001.state.p.azurewebsites.wi... 0.0 "
316 | ]
317 | },
318 | "execution_count": 156,
319 | "metadata": {},
320 | "output_type": "execute_result"
321 | }
322 | ],
323 | "source": [
324 | "df.loc[df['NumberOfParts'] > 5].sample()"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 157,
330 | "metadata": {},
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/html": [
335 | "\n",
336 | "\n",
349 | "
\n",
350 | " \n",
351 | " \n",
352 | " | \n",
353 | " NumberOfParts | \n",
354 | " Length | \n",
355 | "
\n",
356 | " \n",
357 | " \n",
358 | " \n",
359 | " | NumberOfParts | \n",
360 | " 1.000000 | \n",
361 | " 0.604539 | \n",
362 | "
\n",
363 | " \n",
364 | " | Length | \n",
365 | " 0.604539 | \n",
366 | " 1.000000 | \n",
367 | "
\n",
368 | " \n",
369 | "
\n",
370 | "
"
371 | ],
372 | "text/plain": [
373 | " NumberOfParts Length\n",
374 | "NumberOfParts 1.000000 0.604539\n",
375 | "Length 0.604539 1.000000"
376 | ]
377 | },
378 | "execution_count": 157,
379 | "metadata": {},
380 | "output_type": "execute_result"
381 | }
382 | ],
383 | "source": [
384 | "df[['NumberOfParts', 'Length']].corr()"
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 158,
390 | "metadata": {},
391 | "outputs": [
392 | {
393 | "data": {
394 | "text/plain": [
395 | ""
396 | ]
397 | },
398 | "execution_count": 158,
399 | "metadata": {},
400 | "output_type": "execute_result"
401 | },
402 | {
403 | "data": {
404 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAFACAYAAAD589sCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xt4lNWBx/HfTIYggdDMYC7gohAsFQOL7NOKaIm5GC4qcjMU9HEFdFmf1gaNQqMIi6h4edDS9bZEsRCXBTcRQ6nKxVyablWwyqpA0G4BATWJTWa4JAKZzOwfKdEgwkBm5rwz+X7+CWcyefPLkCf55bzve47N7/f7BQAAAEuwmw4AAACAb1DOAAAALIRyBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhThMB+gIn8+nlhY2OAAAANbXpUtMQM+L6HLW0uKXx9NkOgYAAMAZJSbGB/Q8TmsCAABYCOUMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhIStn9913n0aMGKHrr7++7TGPx6MZM2Zo1KhRmjFjhg4ePChJ8vv9evjhh5WTk6Nx48Zpx44doYoFAABgaSErZ5MmTdKLL77Y7rHCwkKNGDFCmzZt0ogRI1RYWChJqqqq0t69e7Vp0yY99NBDWrhwYahiAQAAWFrIdgj4yU9+ogMHDrR7rKysTC+//LIkacKECbrllls0Z84clZWVacKECbLZbLrssst06NAh1dXVKSkpKVTxAACdVGVlmcrLNwflWB6PW5KUkODs8LGysnKUkZHd4eMg8oV1+6b6+vq2wpWYmKj6+npJUm1trVJSUtqel5KSotra2jOWs5gYmxIS4kIXGAAQdeLiYuVwBOfE0Ylydv75vTp8rLi4WH6nQZLBvTVtNptsNluHjsHemgCAs3X55SN1+eUjg3KsBQsK/v52cVCOx++06GbJvTV79eqluro6SVJdXZ1cLpckKTk5WTU1NW3Pq6mpUXJycjijAQAAWEJYy1lWVpZKS0slSaWlpcrOzm73uN/v1//+7/8qPj6e680AAECnFLLTmvn5+dq6davcbrfS09P1y1/+UrNmzdJdd92lkpIS9enTR0uXLpUkXX311frDH/6gnJwcdevWTYsXB2d6GAAAINLY/H6/33SIc9Xc3ML5eQCAMSeuOVu06DHDSRAJLHnNGQAAAE6PcgYAAGAhlDMAAAALoZwBAABYCOUMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhlDMAAAALoZwBQJRwuxs0f/6v5HY3mI4CoAMoZwAQJYqLV6u6eoeKi9eYjgKgAyhnABAF3O4GVVS8Jb/fr4qKzcyeARGMcgYAUaC4eLV8Pp8kyefzMXsGRDDKGQBEgaqqSnm9XkmS1+tVVVWF4UQAzhXlDACiQHp6hhwOhyTJ4XAoPT3TcCIA54pyBgBRIDd3muz21h/pdrtdublTDScCcK4oZwAQBZxOlzIzr5HNZlNmZo6cTpfpSADOkcN0AABAcOTmTtP+/fuYNQMiHOUMAKKE0+nSQw89bjoGgA7itCYAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIcJj7pihUrVFxcLJvNpoEDB+rRRx9VXV2d8vPz5fF4lJaWpieeeEKxsbEm4gEAABgT9pmz2tpaFRUV6dVXX9Xvf/97tbS06PXXX9eSJUs0ffp0bd68WT179lRJSUm4owEAABhn5LRmS0uLjh49Kq/Xq6NHjyoxMVHvvvuuRo8eLUmaOHGiysrKTEQDAAAwKuynNZOTkzVz5kxlZmaqa9euuuqqq5SWlqaePXvK4WiNk5KSotra2jMeKybGpoSEuFBHBgDglByO1jkOfhchmMJezg4ePKiysjKVlZUpPj5es2fP1h//+MdzOlZLi18eT1OQEwIAEBiv1ydJ/C5CQBIT4wN6XtjL2dtvv61/+Id/kMvlkiSNGjVKH3zwgQ4dOiSv1yuHw6GamholJyeHOxoAAIBxYb/mrE+fPvrwww/19ddfy+/365133tHFF1+s4cOHa+PGjZKk1157TVlZWeGOBgAAYFzYZ86GDh2q0aNHa+LEiXI4HBo0aJB+9rOfKSMjQ3fffbeWLl2qQYMGKTc3N9zRAAAAjLP5/X6/6RDnqrm5hfP8ANAJvPRSofbu3W06xnfs2dOaqX//VMNJ2uvXL1UzZ84yHQMnsew1ZwAAnK29e3fr012fKiEuyXSUdmJ8XSVJdfs8hpN8w9NUZzoCOohyBiAkXnzxP/Tmm+s1btwNmj79X03HQRRIiEtS5iVTTcewvIpda0xHQAextyaAkHjzzfWSpPXrf2c4CQBEFsoZgKB78cX/aDdesWKZoSQAEHkoZwCC7sSs2QnMngFA4ChnAAAAFkI5A4Ao4XY3aP78X8ntbjAdBUAHUM4ABN3YsePajceNu8FQks6luHi1qqt3qLiYu/WASEY5AxB0t99+R7sxS2mEntvdoIqKt+T3+1VRsZnZMyCCUc4AhMSJ2TNmzcKjuHi1fD6fJMnn8zF7BkQwyhmAkLj99jv06quvM2sWJlVVlfJ6vZIkr9erqqoKw4kAnCvKGQBEgfT0DDkcrZu+OBwOpadnGk4E4FxRzgAgCuTmTpPd3voj3W63KzeXbY6ASEU5A4Ao4HS6lJl5jWw2mzIzc+R0ukxHAnCO2PgcAKJEbu407d+/j1kzIMJRzgAgSjidLj300OOmYwDoIE5rAgAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAABHD7W7Q/Pm/ktvdYDpKyFDOAABAxCguXq3q6h0qLl5jOkrIUM4AAEBEcLsbVFHxlvx+vyoqNkft7BnlDAAARITi4tXy+XySJJ/PF7WzZ5QzAAAQEaqqKuX1eiVJXq9XVVUVhhOFBuUMAABEhPT0DDkcDkmSw+FQenqm4UShQTkDAAARITd3muz21upit9uVmzvVcKLQoJwBAICI4HS6lJl5jWw2mzIzc+R0ukxHCgmH6QBAOEyefF3bv1999XWDSQAAHZGbO0379++L2lkziXIGAAAiiNPp0kMPPW46RkhxWhNR79uzZqcaAwBgJZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOUPUGz362pPG3BAAALAuI+Xs0KFDysvL05gxYzR27Fht27ZNHo9HM2bM0KhRozRjxgwdPHjQRDREodzcaSeNo3dtHABA5DNSzh555BGNHDlSGzZs0Lp16zRgwAAVFhZqxIgR2rRpk0aMGKHCwkIT0RCFnE6XRo++VjabTaNHXxe1K0oDAKJD2MvZ4cOH9d577+nGG2+UJMXGxqpnz54qKyvThAkTJEkTJkzQW2+9Fe5oiGK5udM0aFAas2YAAMsL+w4BBw4ckMvl0n333addu3YpLS1N8+bNU319vZKSkiRJiYmJqq+vP+OxYmJsSkiIC3VkRIGEhDg9/fTTpmMAOEcOB5dInw2Hw87vxwgW9nLm9Xq1c+dOzZ8/X0OHDtXDDz/8nVOYNptNNpvtjMdqafHL42kKVVQAgEV4vT7TESKK1+vj96MFJSbGB/S8sJezlJQUpaSkaOjQoZKkMWPGqLCwUL169VJdXZ2SkpJUV1cnl4vrggBEv8rKMpWXbw7KsTwetyQpIcHZ4WNlZeUoIyO7w8cBcPbCPk+cmJiolJQU7d69W5L0zjvvaMCAAcrKylJpaakkqbS0VNnZ/FAAgLPhdrvldrtNxwDQQWGfOZOk+fPn695771Vzc7P69u2rRx99VD6fT3fddZdKSkrUp08fLV261EQ0AAirjIzsoM1QLVhQIElatOixoBwPgBlGytmgQYO0du3a7zy+cuVKA2kAAACsI6BytmfPHi1fvlxffPGFvF5v2+NFRUUhCwYAANAZBVTOZs+eralTp2rKlCmy27mdGZHH7W7QU089rvz8X7EILQDA0gIqZw6HQzfddFOoswAhU1y8WtXVO1RcvEazZv3cdBwAAL7XaafBPB6PPB6PMjMztWrVKtXV1bU95vF4wpUR6BC3u0EVFW/J7/eromKz3O4G05E6Bbe7QfPn/4rXGwDO0mlnziZNmiSbzSa/3y9JWr58edv7bDabysrKQpsOCILi4tXy+VoXsPT5fMyehQmzlQBwbk5bzsrLyyVJx44dU9euXdu979ixY6FLBQRRVVVl240sXq9XVVUVlIUQO3m2Mjd3Ktf6AUCAArq6f+rU724WfarHACtKT8+Qw9H6d4jD4VB6eqbhRNGvuHi1mpubJUnNzc0qLl5jOBEARI7TlrOvvvpK27dv19GjR7Vz507t2LFDO3bs0JYtW/T111+HKyPQIbm509ruMrbb7crN5Q+LUKuqqmy7HMLv96uqqsJwIgCIHKc9rfk///M/Wrt2rWpqavToo4+2Pd69e3fl5+eHPBwQDE6nS5mZ12jTpjeVmZnD6bUw6Nevv6qrd7SN+/dPNZgGACLLacvZxIkTNXHiRG3cuFGjR48OVyYg6HJzp2n//n3MmoXJt4uZJO3cud1QEgCIPAGtc/bFF1/ot7/9bbvHevToocGDB2vQoEEhCQYEk9Pp0kMPPW46BgAAZxRQOdu+fbu2b9+uzMzWC6krKir0ox/9SGvWrNGYMWP0L//yLyENCSDS2UwHAICIEdDdmjU1NVq7dq0KCgpUUFCgtWvXqqGhQatWrdJrr70W6owAIsygQZeedgwA+H4BlbP6+nrFxsa2jbt06aK//e1vOu+889o9DgCStHv37pPGfzWUBAAiT0CnNceNG6cpU6YoOztbUuvitNdff72ampo0YMCAkAYEEHkcDoe+vU71iXXmAESOysoyLV++LCjHOn78WNti4FbicDgUG9v1zE8MwG23/asyMrKDcqyAfmL+4he/0MiRI7Vt2zZJ0oMPPqghQ4ZIkp588smgBAEQPRobj5x2DAD4fgH/OZuWlqbk5GS1tLRIar2Ds0+fPiELBgTThg1v6IUXntUdd9ypnJyxpuNEvV69zld9/d/axuefn2gwDYBzkZGRHbSZIJydgMrZyy+/rGeeeUbnn39+20rrkrR+/fqQBQOC6cUXn5MkLVv2LOUsDJqaGtuNmTkDgMAFVM6Kioq0YcMGOZ3OUOcBgm7DhjfabSW0efObFLQQO3l7N7Z7A4DABXS3ZkpKiuLj40OdBQiJE7NmJyxb9qyhJAAAnFlAM2d9+/bVLbfcooyMjHZLZ8yYMSNkwYBgOTFr9n1jAACsJKBy1qdPH/Xp00fNzc1qbm4OdSYgxFitHgBgXQGVszvvvFNS63Uj3bp1C2kgINgGDbpU1dU7240BALCqgK4527Ztm6699lqNHdt6EfWuXbu0cOHCUOYCgmbv3r0njfeYCQIAQAACKmeLFy/W8uXLlZCQIEm65JJL9Oc//zmkwYBgufTSwe3GaWmDv+eZAACYF1A5k6TevXu3/0B7wB8KGFVdvaPdeOfOHd/zTAAAzAvomrPevXvrgw8+kM1mU3Nzs4qKithTExHj5AVRTx4DAGAlAU1/LVy4UKtWrVJtba3S09NVXV2tf/u3fwt1NiAobDbbaccAAFhJQDNnLpfrOxucr1ixQtOnTw9FJiCoWOcMABBJzvnCsRUrVgQxBhA6LpfrpHEvQ0kAADizcy5nzD4gUhw8ePCkscdQEgAAzuycyxnX7SBStLS0nHYMAICVnPaas2HDhp2yhPn9fh07dixkoQAAADqr05azbdu2hSsHAAAA1IHTmgAAAAi+gJbSAADAJI/HLU/TV6rYtcZ0FMvzNNUp1sNNe5GMmTMAAAALYebMALe7QU899bjy838lp9N15g8AgE4uIcGp44dsyrxkqukollexa40SEhJMx0AHUM4MKC5ererqHSouXqNZs35uOg7QprKyTOXlmzt8HLvdLp/P1268YEHBOR8vKytHGRnZHc4FAJGA05ph5nY3qKLiLfn9flVUbJbb3WA6EhB0gwalnXYMAPh+zJyFWXHx6rYZBZ/Px+wZLCUjIztoM1S5uePk8/mUlJSsRYseC8oxAaAzYOYszKqqKuX1eiVJXq9XVVUVhhMBoTFoUJrS0obo+edfMh0FACIK5SzM0tMz5HC0Tlg6HA6lp2caTgQAAKzE2GnNlpYWTZ48WcnJyVq2bJn279+v/Px8eTwepaWl6YknnlBsbKypeCGTmztNFRVvSWq9SDo3lzuPgEjz0kuF2rt3t+kY37FnT2umjtx8EQr9+qVq5sxZpmMAEcNYOSsqKtKAAQN05MgRSdKSJUs0ffp0XXfddVqwYIFKSkp00003mYoXMk6nS5mZ12jTpjeVmZnDUhpABNq7d7f+uuNjpZgOcpK4v79t3PGx0RzfVmM6ABCBjJSzmpoaVVZW6o477tCKFSvk9/v17rvv6sknn5QkTZw4Uc8880xUljNJuvTSIdq48Q0NHjzEdBRLC9ayDqfCsg7oqBRJt8lmOoblLRcr1SO4OsNaoUbK2eLFizVnzhw1NjZKktxut3r27Nl2LVZKSopqa2vPeJyYGJsSEuLO+DyreeGFZyVJy5Y9o2uvHW04jXXFxcXK4ej4ZZHdunXT119/3W7ckePGxcVG5PdduJ14jaPxtQrG92Vn4nDYO/x9wGt+doLxmlvVihXLVF29Q7/7XYlmz77LdJyQCHs5q6iokMvl0uDBg7Vly5YOHaulxS+PpylIycLjww+3tZ3KPXLkiKqq3tY//uNlhlNZ0+WXj9Tll48MyrEmT76u7d//+Z8lHT5epH3fmeD1ti4ZE42v1YmvDYHxen0d/j7gNT87wXjNrcjtbtDGjRvk9/u1YcObuuGGGyNq9iwxMT6g54X9T5EPPvhA5eXlysrKUn5+vt5991098sgjOnToUNsSEzU1NUpOTg53tLB48sn26z0tWfKooSSdy3nnnSdJGjLkHw0nAQCcq1OtFRqNwl7O7rnnHlVVVam8vFxPPfWUrrjiCj355JMaPny4Nm7cKEl67bXXlJWVFe5oYdHYeOS0Y4TGgAE/VFraEC1cSBkGgEjVWdYKtcxJ/Dlz5ui3v/2tcnJy5PF4lJubazpSSHTrFnfaMQAAOLXOslao0e2bhg8fruHDh0uS+vbtq5KSjl8LZHWXXjpY77+/tW2clsYdmwAABKKzrBVqmZmzzmL79o/ajT/++ENDSQAAiCwn1gq12WxRvVYoG5+HmcPh0LFj7ccAACAwubnTtH//vqidNZMoZ2HHDQEAAJw7p9Olhx563HSMkOK0Zpj17t2n3bhPnwsMJQEAAFbEzFmY9evXX19++UXb+KKL+htMAwCRw9NUp4pd1lrX6mhz604353XpbjjJNzxNdUpSgukY6ADKWZht3brlpPG7hpIAQOTo1y/VdIRT2rOnQZKUdKF1zoIkKcGyrxcCQzkLs5YW72nHAIDvmjlzlukIp7RgQYEkadGix87wTCBwXHMGAABgIZQzAAAAC6GcAQAAWAjlDAAAwEIoZwAAABbC3ZoAcJY8Hrf+Jmm5/KajWN6Xks73uE3HACIKM2cAAAAWwswZAJylhASnunx+QLfJZjqK5S2XX90TnKZjABGFmTMAAAALoZwBAABYCKc1gQj30kuF2rt3t+kY37FnT2umE9vbWEW/fqmW3QoIACTKGRDx9u7dre2ffiQlmE5ykpjWN9vrPjKb49s8pgMAwJlRzoBokCD5MnymU1ievZIrOSBVVpapvHxzUI4VzBnirKwcZWRkd/g4iHyUMwAAzpHTyZ2oCD7KGQCgU8nIyGaGCpbGHD8AAICFUM4AAAAshHIGAABgIZQzAAAAC6GcAQAAWAh3ayKoWK3+7LBafeSqUeum3lZy5O9vexhN0V6NpAGmQwARhnKGoNq7d7f2fPKhLuzRYjpKOz+QTZLU8vkHhpN8Y9+RGNMRcI769Us1HeGU6v7+R0hyf+vkGyDrvl6AVVHOEHQX9mjRAz8+cuYndnIP/9lK8xs4G1ad7TwxM7xo0WOGkwDoCMoZEOE8HrfkYWuigHgkT6zbdAoAOC1+mgMAAFgIM2dAhEtIcOrA8f1sfB4Ae6VdCQnshQjA2pg5AwAAsBDKGQAAgIVwWjNAlZVlKi/fHJJjd2TtraysHGVkZAcxDQAAMImZszBLSxty2jEAAOjcmDkLUEZGdtBmqCZPvq7t36xHBAAAvo1yZsCJ2TKKGYLGiuucHf372/OMpmjPIynJdAgAOD3KGRDhrLo1zon9TPsnWShfknVfLwA4gXKGoPJ43Go4HMPWRAH47HCMXJ6Or1bPVkIAEF0sdh4EAACgc2PmDEGVkOBUfOMeNj4PwMN/7qEYVqsHAJwk7OXsyy+/1Ny5c1VfXy+bzaYpU6bo1ltvlcfj0d13363PP/9cF1xwgZYuXaof/OAH4Y4HAABgVNhPa8bExKigoEBvvPGGXnnlFf3Xf/2X/u///k+FhYUaMWKENm3apBEjRqiwsDDc0QAAAIwL+8xZUlKSkpJa72Xv0aOHUlNTVVtbq7KyMr388suSpAkTJuiWW27RnDlzwh0PAMIqmLuPnLhDtiO7jpzA7iOAOUavOTtw4ICqq6s1dOhQ1dfXt5W2xMRE1dfXn/HjY2JsSkiIC3XMoHM4WicsIzH7mTgcdrWYDhFBHA57VH4fSNH9fR5McXGxba9VR/Xq5ZKkoBwvLi6W/zvAEGPlrLGxUXl5ebr//vvVo0f7ZRdsNptsNtsZj9HS4pfH0xSqiCHj9fokKSKzn8mJrw2B8Xp9Ufl9IEX393kwXX75SF1++UjTMU6J/zsguBIT4wN6npGlNJqbm5WXl6dx48Zp1KhRkqRevXqprq5OklRXVyeXy2UiGgAAgFFhL2d+v1/z5s1TamqqZsyY0fZ4VlaWSktLJUmlpaXKzuZaBwAA0PmE/bTm+++/r3Xr1mngwIEaP368JCk/P1+zZs3SXXfdpZKSEvXp00dLly4NdzQAAADjwl7OfvzjH+uTTz455ftWrlwZ5jQIhX1HrLd908Hjrdcw/iDWbzjJN/YdiVF/0yEAAJbDDgEIKqtuKn3w70sMuC6wTr7+su7rBQAwJ6rL2UsvFWrv3t2mY3xHMNciCqZ+/VI7vIk2m3ADANAxUV3O9u7dre27PpEvzlp3ftp8rS/7R/u+MpzkG/amBtMRAACAorycSZIvzqWjl15vOoblnbfz96YjAAAAGVrnDAAAAKdGOQMAALAQyhkAAICFUM4AAAAshHIGAABgIVF9t6bH45a9qZ47EQNgb6qXxxPV3w4AAEQEZs4AAAAsJKqnShISnNp3yMs6ZwE4b+fvlZDgNB0DAIBOj5kzAAAAC6GcAQAAWEhUn9aUWveMtNoNAbbmryVJ/i7dDCf5RuvemommYwAA0OlFdTnr1y/VdIRT2rNntySp/4VWKkOJln29AADoTKK6nM2cOct0hFNasKBAkrRo0WOGkwAAAKuJ6nIG4OxUVpapvHxzUI51Yob4xB8jHZGVlaOMjOwOHwcAIgHlDEBIOJ0szQIA54JyBqBNRkY2M1QAYBhLaQAAAFgI5QwAAMBCOK0Jy+LidABAZ0Q5Q6fAxekAgEhBOYNlcXE6AKAz4pozAAAAC6GcAQAAWAjlDAAAwEIoZwAAABZCOQMAALAQyhkAAICFUM4AAAAsxOb3+/2mQ5yr5uYWeTxNYflcoVitvn//1A4fi9XqAQCIDImJ8QE9j0VoDWC1egAA8H2YOQMAAAiDQGfOuOYMAADAQihnAAAAFkI5AwAAsBDKGQAAgIVQzgAAACyEcgYAAGAhlDMAAAALoZwBAABYCOUMAADAQihnAAAAFkI5AwAAsJCI3lsTAAAg2jBzBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhThMB+hMvvzyS82dO1f19fWy2WyaMmWKbr31VtOxotqxY8d088036/jx42ppadHo0aOVl5dnOlan0NLSosmTJys5OVnLli0zHSfqZWVlqXv37rLb7YqJidHatWtNR4p6hw4d0gMPPKBPP/1UNptNixcv1rBhw0zHilq7d+/W3Xff3Tbev3+/8vLyNH36dHOhQoRyFkYxMTEqKChQWlqajhw5osmTJ+uqq67SxRdfbDpa1IqNjdXKlSvVvXt3NTc366abblJ6erouu+wy09GiXlFRkQYMGKAjR46YjtJprFy5Ui6Xy3SMTuORRx7RyJEj9e///u86fvy4jh49ajpSVEtNTdW6desktf7xl56erpycHMOpQoPTmmGUlJSktLQ0SVKPHj2Umpqq2tpaw6mim81mU/fu3SVJXq9XXq9XNpvNcKroV1NTo8rKSt14442mowAhcfjwYb333ntt3+OxsbHq2bOn4VSdxzvvvKO+ffvqggsuMB0lJChnhhw4cEDV1dUaOnSo6ShRr6WlRePHj9eVV16pK6+8ktc8DBYvXqw5c+bIbudHTDjddtttmjRpkl555RXTUaLegQMH5HK5dN9992nChAmaN2+empqaTMfqNF5//XVdf/31pmOEDD85DWhsbFReXp7uv/9+9ejRw3ScqBcTE6N169bpD3/4gz766CN9+umnpiNFtYqKCrlcLg0ePNh0lE5l9erVeu211/TCCy9o1apVeu+990xHimper1c7d+7UtGnTVFpaqm7duqmwsNB0rE7h+PHjKi8v15gxY0xHCRnKWZg1NzcrLy9P48aN06hRo0zH6VR69uyp4cOH649//KPpKFHtgw8+UHl5ubKyspSfn693331X9957r+lYUS85OVmS1KtXL+Xk5Oijjz4ynCi6paSkKCUlpW0mfsyYMdq5c6fhVJ1DVVWV0tLSdP7555uOEjKUszDy+/2aN2+eUlNTNWPGDNNxOoWGhgbGThCSAAAGfklEQVQdOnRIknT06FG9/fbbSk1NNZwqut1zzz2qqqpSeXm5nnrqKV1xxRVasmSJ6VhRrampqe3Gi6amJv3pT3/SD3/4Q8OpoltiYqJSUlK0e/duSa3XQA0YMMBwqs7h9ddf13XXXWc6Rkhxt2YYvf/++1q3bp0GDhyo8ePHS5Ly8/N19dVXG04Wverq6lRQUKCWlhb5/X6NGTNGmZmZpmMBQVVfX69f/OIXklqvsbz++uuVnp5uOFX0mz9/vu699141Nzerb9++evTRR01HinpNTU16++23tWjRItNRQsrm9/v9pkMAAACgFac1AQAALIRyBgAAYCGUMwAAAAuhnAEAAFgI5QwAAMBCWEoDgFE/+tGPNGPGDBUUFEiSli9frqamJv3yl7/s8LELCgqUkZHRoZXEa2pq9OCDD+qvf/2rfD6fMjIyNHfuXMXGxkpqXQ7nL3/5iyZPnqxdu3Zp69atio+Pl91u14IFCzRs2LCAP9eWLVvUpUsX/dM//dM55wUQ+Zg5A2BUbGysNm3apIaGBtNR2vF6vfL7/brzzjt1zTXXaNOmTdq4caOampr061//WpL01Vdf6eOPP9b69es1ffp0SdLcuXO1bt063XPPPVqwYMFZfb6tW7dq27ZtofhyAEQQZs4AGOVwOPSzn/1MK1eu1N13393ufSfPfA0bNkzbtm3Tli1b9PTTTys+Pl6ffvqpxo4dq4EDB6qoqEjHjh3Ts88+qwsvvFCS9Pbbb6uwsFCNjY0qKChQZmamWlpatGTJEm3dulXHjx/XzTffrKlTp2rLli36zW9+o549e2rPnj1auHChunbtqsmTJ0tq3af1/vvvV3Z2tvLy8jRz5kzV1tZq/Pjxmj9/frvsP/nJT7Rv3z5J0n//93/rlVdeUXNzsy666CI98cQT6tatmwoKChQbG6vq6molJydr27Ztstvt+t3vfqf58+frq6++0rPPPiu73a74+HitWrUq1P8dACyAcgbAuJtvvlk33HCDbr/99oA/ZteuXXrjjTeUkJCg7Oxs5ebmqqSkRCtXrtTLL7+sefPmSZI+//xzlZSUaN++ffrnf/5nXXnllSotLVV8fLxeffVVHT9+XFOnTtVVV10lSdq5c6fWr1+vvn37qqioSGlpae0+b48ePdS7d2999tlnev7553XHHXdo3bp1kqSSkpK255WXl2vgwIGSpJycHE2ZMkWS9Otf/1olJSW65ZZbJEm1tbVas2aNYmJi9PTTTysuLk633XabJGncuHFavny5kpOT27YhAxD9KGcAjOvRo4fGjx+voqIinXfeeQF9zJAhQ5SUlCRJuvDCC9vK1cCBA7Vly5a2540dO1Z2u139+vVT3759tXv3bv3pT3/SJ598oo0bN0qSDh8+rM8++0xdunTRkCFD1Ldv33P+Wp544gk9//zzcrlceuSRRyRJf/nLX7R06VIdPnxYjY2N+ulPf9r2/DFjxigmJuaUxxo2bJgKCgo0duxY5eTknHMmAJGFcgbAEm699VZNmjRJkyZNanssJiZGPp9PkuTz+dTc3Nz2vhMX5EuS3W5vG9vtdrW0tLS9z2aztfs8NptNfr9fDzzwgEaOHNnufVu2bFFcXFzb+OKLL24rcCccOXJEX375pS666CLV19d/5+uYO3fud25AKCgo0HPPPadLLrlEa9eu1datW9ve161bt+95RaRFixbpww8/VGVlpSZPnqxXX31VTqfze58PIDpwQwAAS0hISNCYMWPanRq84IILtGPHDkmtpwm/Xc4CtWHDBvl8Pu3bt0/79+9X//799dOf/lSrV69uO96ePXvU1NT0nY8dMWKEvv76a5WWlkpq3VT8scce08SJE09bqk7W2NioxMRENTc3a/369d/7vO7du6uxsbFtvG/fPg0dOlSzZ8+W0+lUTU1NwJ8TQORi5gyAZcycObPdRe9TpkzRz3/+c91www0aOXJku1mtQPXu3Vs33nijGhsb9eCDD6pr167Kzc3V559/rkmTJsnv98vpdOq55577zsfabDY9++yzevDBB/Xcc8/J5/Pp6quvVn5+/lllmD17tnJzc+VyuTR06NB2BezbMjMzlZeXp7KyMs2fP18rVqzQZ599Jr/fryuuuEKXXHLJWX/9ACKPze/3+02HAAAAQCtOawIAAFgI5QwAAMBCKGcAAAAWQjkDAACwEMoZAACAhVDOAAAALIRyBgAAYCH/D9Nb5JFvyswVAAAAAElFTkSuQmCC\n",
405 | "text/plain": [
406 | ""
407 | ]
408 | },
409 | "metadata": {},
410 | "output_type": "display_data"
411 | }
412 | ],
413 | "source": [
414 | "plt.figure(figsize=(10,5))\n",
415 | "seaborn.boxplot(data=df, x='NumberOfParts', y='Length')"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 159,
421 | "metadata": {},
422 | "outputs": [
423 | {
424 | "data": {
425 | "text/html": [
426 | "\n",
427 | "\n",
440 | "
\n",
441 | " \n",
442 | " \n",
443 | " | \n",
444 | " NumberOfParts | \n",
445 | " Length | \n",
446 | " LongestPart | \n",
447 | " TLD | \n",
448 | " Randomness | \n",
449 | "
\n",
450 | " \n",
451 | " \n",
452 | " \n",
453 | "
\n",
454 | "
"
455 | ],
456 | "text/plain": [
457 | "Empty DataFrame\n",
458 | "Columns: [NumberOfParts, Length, LongestPart, TLD, Randomness]\n",
459 | "Index: []"
460 | ]
461 | },
462 | "execution_count": 159,
463 | "metadata": {},
464 | "output_type": "execute_result"
465 | }
466 | ],
467 | "source": [
468 | "df.loc[df['NumberOfParts'] == 1]"
469 | ]
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": 160,
474 | "metadata": {},
475 | "outputs": [
476 | {
477 | "data": {
478 | "text/plain": [
479 | ""
480 | ]
481 | },
482 | "execution_count": 160,
483 | "metadata": {},
484 | "output_type": "execute_result"
485 | },
486 | {
487 | "data": {
488 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAFACAYAAAD589sCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xl8nHW5///Xfc+WyZ60adJ9o/sOFFoquyxSUEBQvy7nWDwHVBQR4XsARVABUdEfB48/pMJhEwQppQgB2tKF7rRAC93SLV2TZt8z+33f3z8mk6ZtmsxM7nsmmVxPHz46mcxyMZ1m3rk+m2IYhoEQQgghhOgV1GQXIIQQQgghTpBwJoQQQgjRi0g4E0IIIYToRSScCSGEEEL0IhLOhBBCCCF6EQlnQgghhBC9iIQzIYQQQoheRMKZEEIIIUQvIuFMCCGEEKIXsSe7gJ7QdR1NkwMOhBBCCNH7ORy2qG7Xp8OZphk0NHiSXYYQQgghRLcKCrKiup0MawohhBBC9CISzoQQQgghehEJZ0IIIYQQvYiEMyGEEEKIXkTCmRBCCCFELyLhTAghhBCiF5FwJoQQQgjRi0g4E0IIIYToRSScCSGEEEL0IhLOhBBCCCF6EQlnQgghRJxeffXv/PTOH+Lz+ZJdikghloWz++67j7lz53Lttde2X/e73/2Oq6++muuuu47bb7+dpqam9u89/fTTXHHFFVx11VWsXbvWqrKEEEII07z++j84cvQw9fV1yS5FpBDLwtmNN97IM888c9J18+bN45133uHtt99m1KhRPP300wDs37+f4uJiiouLeeaZZ/jVr36FpmlWlSaEEEKYStNCyS5BpBDLwtns2bPJyck56bovfOEL2O12AGbOnElFRQUAK1asYP78+TidToYPH87IkSP5/PPPrSpNCCGEMFUoJOFMmMeerCd+4403+NKXvgRAZWUlM2bMaP9eYWEhlZWV3T6GzaaQm5tuWY1CCCFENNxuh3weCdMkJZw99dRT2Gw2vvzlL/focTTNoKHBY1JVQgghRHzq65vl80h0q6AgK6rbJTycLV68mNWrV/P888+jKAoQ7pRFhjgh3EkrLCxMdGlCCCFEXGRYU5gpoVtprFmzhmeeeYannnoKt9vdfv1ll11GcXExgUCAo0ePcujQIaZPn57I0oQQQoi4STgTZrKsc3bXXXexefNm6uvrueiii/jxj3/MwoULCQQCLFiwAIAZM2bw61//mnHjxvGlL32Ja665BpvNxi9/+UtsNptVpQkhhBCmCgYDyS5BpBDFMAwj2UXEKxjUZIxfCCFE0nz1q/MB+NnP7uWCCy5McjWit4t2zpmcECCEEEL0UCAQTHYJIoVIOBNCCCHi0HHgSYY1hZkknAkhhBBxCAZPdMsCAQlnwjwSzoQQQog4BAL+9svSORNmknAmhBBCxMHv93d6WYieknAmhBBCxKFjIPP5fEmsRKQaCWdCCCFEHDoOa8qcM2EmCWdCCCFEHDp2y/x+6ZwJ80g4E0IIIeIgw5rCKhLOhBBCiDj4fF4AFEVtvyyEGSScCSGEEHGIdMvS7BnSOROmknAmhBBCxMHrDXfL3M6M9stCmEHCmRBCCBGHyFBmmkPCmTCXhDMhhBAiDuFAprSFM0+yyxEpRMKZEEIIEQev14vD7sSuOqVzJkwl4UwIIYSIg9frwWlz4bC5CAYDaJqW7JJEipBwJoQQQsTB6/Vgtzmw25wAsp2GMI2EMyGEECIOHo8Xm+LE0RbOPB6ZdybMIeFMCCGEiIPH04rD5sRhcwHIogBhGglnQgghRBw8Hg92m3TOhPkknAkhhBBx8Ho8ONoWBICEM2EeCWdCCCFEHDzecDizt3fOWpNckUgVEs6EEEKIGOm6js/nPWnOmXTOhFkknAkhhBAximw623FYUxYECLNIOBNCCCFiFAlidpsTu+pAQZHOmTCNhDMhhBAiRpEg5rS5UBQFh12OcBLmkXAmhBBCxKi9c6aGFwM4bC5ZECBMI+FMCCGEiFGkcxaZb2a3SedMmEfCmRBCCBGjSOcssgGtXXXKggBhGglnQgghRIwinTN7h3DmaZVhTWEOCWdCCCFEjHy+E1tphP904pFhTWESCWdCCCFEjCLzy+yqI/ynzYlXttIQJpFwJoQQQsTI5/NiU+2oqg0ID2v6fL4kVyVShWXh7L777mPu3Llce+217dc1NDSwYMECrrzyShYsWEBjYyMAhmHw8MMPc8UVV3Ddddexc+dOq8oSQggheszr9WK3Odq/ttsc+PxeDMNIYlUiVVgWzm688UaeeeaZk65buHAhc+fOZdmyZcydO5eFCxcCsGbNGg4dOsSyZcv4zW9+w0MPPWRVWUIIIUSP+Xy+9pWaEO6c6bpOKBRKYlUiVVgWzmbPnk1OTs5J161YsYLrr78egOuvv54PPvjgpOsVRWHmzJk0NTVRVVVlVWlCCCFEj/h8Pmyqg0M1OzhUs6N97pkMbQoz2BP5ZLW1tQwaNAiAgoICamtrAaisrKSoqKj9dkVFRVRWVrbf9kxsNoXc3HTrChZCCCE6oetB7KqDgzU7ABg5YDIATifyuSR6LKHhrCNFUVAUpUePoWkGDQ2yOkYIIURitbR4sCkODMJzzCKds+rqBlyurGSWJnqxgoLo3hsJXa05YMCA9uHKqqoq8vPzASgsLKSioqL9dhUVFRQWFiayNCGEECJq4WHNE/0NW1s48/tlWFP0XELD2WWXXcaSJUsAWLJkCZdffvlJ1xuGwbZt28jKyup2SFMIIcTpdF1Pdgn9gt/vPyWchS8HAoFklSRSiGXh7K677uIb3/gGBw8e5KKLLuL111/n1ltvZf369Vx55ZVs2LCBW2+9FYCLL76Y4cOHc8UVV/DAAw/w4IMPWlWWEEKkrDfeeI3bf/i9ZJfRLwQCgfZuGUg4E+aybM7Zn/70p06vf+GFF067TlEUCWRCCNFDr7zyYrJL6DcCAT/ZLlv715FwJsOawgxyQoAQQggRo2Aw2GnnLBiUfc5Ez0k4E0KIFCO71FsvGAigKh06Z22Xg0EZ1hQ9J+FMCCFSjCwKsJZhGIS0EDb1RDhT2ztnwWSVJVKIhDMhhEgxmqYlu4SUFjmiSVVOTNtW2ztnEs5Ez0k4E0KIFCOdM2uFQuEA1rFzFrksZ2sKM0g4E0KIFCOdM2ud6Jx1GNZUIuFMOmei5yScCSFEipHOmbUi4UxRTnyERi5L50yYQcKZEEKkGE2TgGClSGdS7RDOwpcV6VoKU0g4E0KIFCNbaVjrxLDmyR+hqqpK50yYQsKZEEKkGBnWtFbk9VVODWeKiq5L50z0nIQzIYQQIgaRAKac8hGqKKoEY2EKCWdCCJFiZFjTWpoW6ZwpJ12vKjLnTJhDwpkQQqSYU0ODMFd75+yU11lBlWAsTCHhTAghUoyEM2u1zzk79SNUUWRYU5hCwpkQQqQYVZUf7VaKdMdO65xJOBMmkX/BQgiRYiScWetE5+zUYU0JZ8Ic8i9YCCFSjISzBDlt+FiGk4U55F+wEEKkGJvNnuwSUtqZJv0rXXxPiFhIOBNCiBQjnTNrtc85O/UbioQzYQ75FyyEECnGZrMlu4R+SoY1hTkknAkhRIqRcCZE3ybhTAghUozscyZE3ybhTAghhBCiF5FwJoQQQphCFgMIc0g4E0IIIcxgyJCyMIeEMyGEECIGkQB2ap/MQMKZMIeEMyGEECIG7QHstD3NDAlnwhQSzoQQQogYRDb5NU7pnRkSzoRJJJwJIYQQMTgRwE7pnBmGnM4gTCHvIiGEECIGitLWOTOkcyasIeFMCCGEiIGqti0IODWcGRLOhDkknAkhhBAxaO+cdbJeU4Y1hRnsyXjS559/ntdffx1FURg/fjy//e1vqaqq4q677qKhoYEpU6bw+9//HqfTmYzyhBBCiDNqXxBwSudMN4z24CZETyT8XVRZWcmLL77IG2+8wTvvvIOmaRQXF/P444/z3e9+l+XLl5Odnc2iRYsSXZoQQgjRrROrNfWTrjdkQYAwSVLeRZqm4fP5CIVC+Hw+CgoK2LRpE1dddRUAN9xwAytWrEhGaUIIIUSXZJ8zYbWED2sWFhZyyy23cOmll+JyuZg3bx5TpkwhOzsbuz1cTlFREZWVlYkuTQghhOjWmfY50w1dOmcJUFKyi5aWFiZPnkp6enqyy7FEwsNZY2MjK1asYMWKFWRlZfGTn/yEtWvXxvVYNptCbm5q/sUIIUS85Oeitfz+DCA8x6wjQ9fJyEiT199ClZUV/Pzn9wDwrW99m1tu+V6SK7JGwsPZhg0bGDZsGPn5+QBceeWVfPrppzQ1NREKhbDb7VRUVFBYWNjtY2maQUODx+qShRCiT5Gfi9ZqaQkAJ885MwwDA4NAQJPX30IHDhxpv3zsWHmfe60LCrKiul3C+69Dhgzhs88+w+v1YhgGGzdu5KyzzuL8889n6dKlALz55ptcdtlliS5NCGGio0ePsHr1ymSXIYTpIkOXuq7jDbTQ5K1lf9W2k74nrFFfXw+ATVGor69LcjXWSXjnbMaMGVx11VXccMMN2O12Jk2axNe//nUuueQSfvrTn/LEE08wadIkbr755kSXJoQw0R/+8AhlZce45BL5RSvRdF3mPlkp8tpWNx+lxR8OC1uPfHDS94Q16upqAMh22qmprkpyNdZJyj5nd9xxB3fcccdJ1w0fPly2zxAihZSVHQPCq7NtNluSq+lfJJxZK/LaNnprzvg9YY2amhpsqkqW005lbU3Knsog7yIhhKX8fn+yS+h3NE1LdgkprX1Y0zj9dVZV+UXEStXVlbjtNtJtNvyBAM3NTckuyRISzoQQlvL5vMkuod/RdQlnVmrvjp22z9mJczeFNcrLy3CrCm57OARXVBxPckXWkHAmhLBUIBBIdgn9jq7r3d9IxK2roUsZ1rSOrutUVBwn3WEj3REOZ+Xl5UmuyhryLhJCWOrU8weF9UIh6ZxZKTJ02dk7W87WtE5NTQ3BYJAMh510uw0FKCs7muyyLCHvIiGEpVJxsm5vJ8Oa1jrxnu5sWFM+Vq1y9OhhADLtNlRFIcPp4OjRI93cq2+Sd5EQwlIulyvZJfQ7Mqxpra5+4ZBfRqxz5MghADKd4Y0mMuwqhw+XJrEi60g4E0JYKi0tLdklCGEqmXOWHIcOHcTtsONoe42zHHaqqqrwePrWKQHRkHeREMJSTqd0zhJN5vmJVFR6YB+Z9hOxJbutg3boUOp1zyScCSEsJRvQJp4MrVlLhjUTz+v1cLziONlOR/t1kXBWWnogWWVZRsKZEEKkGAkIItWUlh7AMAxynCcONnLZbKTZ7ezfvzeJlVlDwpkQQqQYCWci1ezbFw5gOR06ZwDZDpW9e3cnoyRLSTgTQogUI0cIWaurOX0y388a+/aVkO6w47SdHFtynA4qKytT7hgnCWdCCJFiZMWgtSSAJd6ekt1kO07/pSPXFe6k7d27J9ElWUr+BQshRIqx26VzZqWuwpnsMWe+mppq6hvq24NYR9lOO4oCe/eWJKEy60g4E0KIFCOdM2udCGCnz+2TcGa+PXvCwSvXeXo4s6sqWU4He/ak1rwz+RcshBApRuacWStyPFZnyy4knJlvz57d2FSVrA4rNTvKcdjZu7cETUudY8sknAkhRIqRveWspWltAayTdKZpocQW0w/sKdlFtsOOeoZVyLkuB36/v/3szVTQeQw9xcGDB3n22WcpLy8nFDrxxnvxxRctK0wIIUR8ZCsNa50IYKe/zqnUvekN/H4/pQcPMDLzzMfAReailZTsZtSoMYkqzVJRhbOf/OQnfOMb3+BrX/uazGUQQoheTsKZtSJNCqWTcNaxgSF67sCBfei63ul8swi3TSXNbmfPnt1cffX8BFZnnajCmd1u55vf/KbVtQghhBC9XjB45gAWDAYTWEnqi0z072ylZoSiKGQ7bJTs3pmosizXZRusoaGBhoYGLr30Ul5++WWqqqrar2toaEhUjUIIIUSvEQqFA9jpHUpFwpnJSkp2keF0nLb57KnyXA6qqquor69LUGXW6rJzduONN6IoSvueLs8++2z79xRFYcWKFdZWJ4QQQvQygYAfOH1YU1EUgsFAMkpKSYZhUFKyi5xONp89VaSztmfPbubMmWd1aZbrMpytXLkSCE/Ic7lcJ33P7/dbV5UQJtN1nVAohNPpTHYpQog+LhAIB7BTO2cKSvv3RM8dP15OS0sLI/Ozur1ttjO8mjNVwllUs/u/8Y1vRHWdEL3V448/yre//dWUaXkLIZLH5/MBnXfOIt8TPVdSsgvoer5ZhKoo5Djt7E6ReWddds6qq6uprKzE5/Oxa9eu9uHNlpYWvF5vQgoUwgwffbQRgNLSA5xzTn6SqxFC9GV+f1sAU07ubygoMqpkot27d+K028iI8jiyXJeDAwf24/f7cLnOvPVGX9BlOFu3bh2LFy+moqKC3/72t+3XZ2RkcNddd1lenBBm6NgtKy3dzznnzE5iNUKIvu5MnTNQ8Ho9iS8oRe3cuZ0chy3qrWHyXA4ONnnYu3cP06bNsLg6a3UZzm644QZuuOEGli5dylVXXZWomoQwVcc2966d2+Hm/5PEaoQQfV1k5Eg9pXOmKqqMKpmktraGysoKJuRmRn2fyPDnrl07UjucRZSXl/Pcc8+ddF1mZiZTp05l0qRJlhQmhFm2b/+cNLvChUVeVpfsJBgM4nB0P4dBiL7KMAzZiNZCke7YaQsCFAWPRzpnZtix43MA8tOi/1ntUFWyXQ62b/+Mr3/9W1aVlhBRLQjYsWMHr776KpWVlVRWVvLqq6+ydu1afvGLX/C3v/3N6hqF6JGS3dsZlxNgcn6IYDBEaen+ZJckhKVkl3preTwelLb/daQoKl4JZ6bYvv0znDYbWY6oekjt8p0O9u4t6fMLM6IKZxUVFSxevJh7772Xe++9l8WLF1NXV8fLL7/Mm2++aXWNQsRN13XKj5czIlNjeGb4zLuysmNJrkoIa8n5jtZqbW3BYXfBaVtpqHg8re2L50R8DMNg27ZPyHPZY+4AD0hzomkaO3dut6i6xIgqnNXW1p60P5TD4aCmpoa0tDTZN0r0asFgkFBII8Ohk+kI/8CUYQeR6mQjVGu1trbgtJ++GlBVVEJaqH2TWhGfI0cOU19fz8C02PNFXpoDm6qydevHFlSWOFH1C6+77jq+9rWvcfnllwPhzWmvvfZaPB4PY8eOtbRAIXrC4Qj/Q/UEVTzB8G9gaWl9e4m1EN2RjVCt1dLSgtN2+s8RpW2BQEtLS5/fyiGZPvlkM0Bc4cymKOQ77WzZ8hHf+973++zcy6jC2e23386FF17I1q1bAfjVr37FtGnTAPjjH/9oXXVC9JCqqgwZMoSjLQc52hLeK2fYsOFJrkoIa8mKQWs1NzfjsLnQDf2k6yOrN5ubmxkwYGAySksJH23aQI7LQVqU+5udqiDdxa6aag4fPsioUWNMri4xop5pN2XKFAoLC9vnMpSXlzNkyJC4nrSpqYlf/OIX7N27F0VRePTRRxk9ejQ//elPKSsrY+jQoTzxxBPk5OTE9fhCdDRh4hQ2fHiMonQNh93OmDFnJbskISzl80k4s1JTUxNOew6+YOtJ1yvt4awpGWWlhJqaavYf2Me4nIy4H2OQ28Uumtm0aUOfDWdRzTl76aWXuOCCC7jlllu47bbb2v8fr0ceeYQLL7yQ999/n7feeouxY8eycOFC5s6dy7Jly5g7dy4LFy6M+/GF6GjatBl4grD0aBrjJ0yUeZIi5cm8Sms1NzXhsrtPu15tH9ZsTnRJKWPjxnUAFKa7urnlmblsKnkuJ+vXfdhnF2dE1Tl78cUXef/998nLy+vxEzY3N7NlyxYee+wxAJxOJ06nkxUrVvDSSy8BcP311/Od73yHe+65p8fPJ8TkyVPbL0+ZMj2JlQiRGK2tLckuIWWFQiE83lZceemnfU9VwsNwjY2NiS4rZaxds5psl4OMGLfQONXgdBe7jpdz6FApo0f3vbnxUf3XFxUVkZXV/anw0Th27Bj5+fncd999lJSUMGXKFH7+859TW1vLoEGDACgoKKC2trbbx7LZFHJzT/8HIkRHHd8j06dPlfdMgsnrnRgdt88IhXzyulsk8tnkcpz++kaGNQMBj7z+cTh69CgHSvfHdCrAmRSmu9jd0MJHH61j1qxpJlSXWFGFs+HDh/Od73yHSy655KQhoQULFsT8hKFQiF27dvHAAw8wY8YMHn744dOGMBVFiWqFhaYZNDRI+15ELysrX94zCSavd2I0NNS3X66srJHX3SJHjpQDkGbvLJwpuBxuKiqq5PWPwzvvvIsCFPVgSDPCaVMZmOZg2bKl3Hzzt7HZ4ltcYLaCgugaXVHNORsyZAjz5s0jGAzS2tra/v94FBUVUVRUxIwZ4XOvrr76anbt2sWAAQOoqqoCoKqqivz8/LgeX4iumDE0L0Rv1NDQ0H65vr6+i1uKnoi8zmmddM4i13f8uxDR0XWdlSuXMyDNGfcqzVMNzUijsbGRbds+NeXxEimqztmPfvQjILw82+0+fRJkLAoKCigqKqK0tJQxY8awceNGxo4dy9ixY1myZAm33norS5Ysad9TTQgzpaX17P0rRG9VX1/b6WVhrkiH0uXofDWhy5ZxUhdTRGf79s+oq6tl+oBs0x6zwO3CabOxevUKzjlntmmPmwhRdc62bt3KNddcw5e+9CUASkpKeOihh+J+0gceeIC7776b6667jt27d/P973+fW2+9lfXr13PllVeyYcMGbr311rgfX4gz6S2tbSHMFpkLZQdqqquTW0wKiwQv9xnCWZojg/q6ukSWlBJWrfoAh83GIBOGNCNURaHI7WTzRxtobu5bK2ij6pw9+uijPPvss/zgBz8AYOLEiXz8cfxHI0yaNInFixefdv0LL7wQ92MKIUR/Vl0dnhaSBlRXVSa3mBRWX1+Pw+bEbut8S540RwbH6w5gGEaf3Z0+0TweD5s2rafI7cRm8ms2NDONIy1e1q//kKuvvtbUx7ZSVJ0zgMGDB598RzXquwohhLBYZWUFdsK/cTe1NMspARapq6vF7TzzakK3M5NgMCB7zcVg48Z1BINBhmSYf+RVlsNOltPBqlUfmP7YVooqYQ0ePJhPP/0URVEIBoM8++yzcqamEEL0IuVlx7ADjravKyuPJ7OclFVXV0uavYtw5gh/T+b9RW/NmlVkOOzkOHu2t1lnFEVhcLqT/fv3UVHRd/5NRBXOHnroIV5++WUqKyu56KKL2L17Nw8++KDVtQkhUkBf3aG7LzEMg+PlZTg4Ec7KysqSWVLKqq2tJe0M880A0trCWTR7dQpobGxg587tFLqdlg0DF6WHO3IbNqy15PGtEFVMzc/PP+2A8+eff57vfve7VtQkhEghmqZht5v/G7E4ob6+Do/PSz7hcKYAx44dSXJVqccwDBrq6xhYMOqMt0lvG/Ksq5NwFo0tWz7CMIweHdfUHbfdRo7LwaZN67nxxq9Z9jxminvi2PPPP29iGUKIVOX3+5NdQso7ejQcxJyEg1meqnL06OGk1pSKmpqaCGkh3M4zbyQa+V5tbU2iyurTPv74I9wOO1k9PK6pOwVpTg4c2N9ntjmJO5zJUIUQIhrBYCDZJaS8w4cPASeGNAt1nUOlpUmrJ1VFAldXCwJsqh2Xwy3DmlHQNI0d2z9jgNNu+crWgWnh1bU7dnxu6fOYJe5wJkuEhRDRCAaDyS4h5R06VEq2qhLZxa8IqKiswOfzJbOslFNbG94/Lt3Z9Uap6c6s9tuKMzty5BBen4+8tM63JTFTttOO3aaye/dOy5/LDF32EWfNmtVpCDMMQ4YqhBBR6Xggt7BG6f69FOk6kR7lYMDA4NChUiZOnJzM0lJKTU33nTOANHsW1bIRcLdKSw8AWLJK81SKopBlt7F//z7Ln8sMXb4iW7duTVQdQogUpet6sktIaV6vl2PlZVwMHGq7bmjbn6Wl+yWcmaimphpVsZFmP/NqTQh3zipq9yeoqr7r2LGjqIpCuklnaXYn02Hn2LEjfWKDYNlJVghhKV2XzpmVSkvDu9EP63BdFpClquzbtydZZaWk2toa0l2Z3X6wp7uyaW1tkWHlbtTUVJPuiH2+WYM/SGljKw3+2KZMuO02fD4fHk9rTPdLBglnQghLhUKhZJeQ0vbu3Q1wUjhTUBim6+zZvSs5RaWo6uoq3I4zr9SMSG9bsVlTI0ObXWlqasQeYwOrwR9kR7OPmVdew45mX0wBzamGn6wvnLMp4Uz0KzL/KfHkGCFr7dq1g4GKSgYnf8qNBCqrq2RLBxNFH87CCwYknHUtEPBjizGc1fkCXDP/Wn784x9zzfz51PmiXw1uawtngUDvX0EuO0OKfiUUCmGzJWZ+gwhraen9v6X2VZqmsXvXDqYaOpwSzka3/blz53YuuujShNeWajRNo76+jkGFZ3V7W+mcRUdVbcS6K1d+mpN3i98BDN4tLmZqVvTncUaeS1V793wzkM6Z6GdCIdnWIRE6LgJobGxMYiWpbc+e3Xh9Pjo76bgISFdUtm37NNFlpaSGhnp0Xe9yA9qI8PmaioSzbrjd6cQ66SHX5WBqVhrblr3H1Kw0cl2O7u/UJtSWztLS3DE+a+JJ50z0K7LnVmJ0nHDb3NyUxEpS2yefbEEFxnTyPRWFswydrZ9sQdM06Rj3UHV1FXCiK9YVVbWR7sqUcNaNvLw8AnrsG9rnuhwxhbIIf0hDURRycnJjvm+iSedM9Csy5ywxWltbO70szGMYBh9tXMdowE3nwzSTgKaWZkpKZGFAT0Xm7mV0swFthNuRJeGsG4WFRfiCIUIJ2m7HE9LIz8vH4Yg92CWahDPRr0jnLDE6TrjtC5Nv+6JDh0o5XllBV7uYjQMcisL69WsSVVbKigQtdwzhrLqqysqS+rwRI0YC0BJMzC/NLSGdkaNGd3/DXkDCmehXJJwlRsc5Z7LPmTU+/HAVNhSmdnEbFwoTDYP1az+U934P1dTU4LA7cdpdUd0+3ZlFbV2NnEPdhXHjJgDEvF9ZPIK6TnMg2P6cvZ2EM9GvBAJy7FgidAxkmiYnBJgtGAzy4aoPGI9B+hmGNCPc3zAtAAAgAElEQVRmAS2eVrZs2ZSY4lJUbW11t2dqdpTuzCIYDMqcyy7k5eVTWFgU03YY8ar3hQPg5Mld/TrTe0g4E/2Kx+NJdgn9Qse5fdI5M9+WLZtoamnm3ChuOxbIVVSWL3/f6rJSWk1NDWn2rs/U7CiycED2mevarFnnUBcIoVncYazxBXA5nUyYMMnS5zGLhDOR8joOK9TX1yWxkv5D5pxZa+n7xeQqKt3vuBVetXm2ofP559s4frzc8tpSVW1NTVQrNSMiW27U1dVaVVJKOPfc89B03dLumWEYVPuCzJh5Tp9YDAASzkQ/0HHF1LFjR5NYSf/h9/s6XJahZDOVl5exY+d2zjV01G6GNCPOIfzDXrpn8dE0jcamBtzO6Dtn7vbOmYSzrkydOgO3202Fx7qfEw2BEL5QiDlzLrDsOcwm4UykvI7bCOzevSOJlfQfkeFjw2HQ2tqS5GpSywcfLEUFzo7hPtkoTABWrlgqCwPi0NBQj2EYbZvLRifNng4o0jnrhsPhYM6ceVT7Amhx7HkWjYpWHw67g9mz51jy+FaQcCZS3ubNm8h2wrUjfZSU7JIJugnQ/hpnQpO83qbRNI1VK5czAciKsmsWMRtobmnh4483W1JbKosErFg6Z6pqw+3MkHAWhQsvvISgplPtM797phsGFd4A584+n/T0dNMf3yoSzkRK83g8bNmykfMG+ZhbFEDXDdnzKQEaGhoAMHIMGhpknp9Ztm37lKbmJmbFcd8xQJaisubDlWaXlfIaGuoBSIuhcxa+fUb7fcWZTZ06nby8PMpbfd3fOEY13gABTePiiy8z/bGtJOFMpLSPPtpAMBjigqIAIzI1hmXq8uGUALW1NahuFdKhqalZhtJMsmnTetIUhXFx3NeGwlRDZ+vWj/F6vabXlsrq6yPhLCOm+7ns6dTXSTjrjs1m4+KLL6fGF8Bv8ikuZa0+srOymTXrHFMf12oSzkRK+/jjzeSnwbgcDUWB8wf52bN3jwxtWqyysgI9XYcMwDDazyUU8dN1nS2bNzLeMLDHOKQZMQkIhkJyGHqMGhvDnWCXPbZhMemcRe/SSy/HMOB4q3lDmwFNp8YX4KKLL8Nu71tHiUs4SwKv10N5eVmyy+gX9u/bzYScAErbZ9mE3BAABw7sT2JVqe/oscPoWTpGVniCb1nZsSRX1PcdPnyQ5paWuLpmESMAp6KwY8dnZpXVLzQ1NeK0p2FTTz48Pqj5cbvd3HTTTbjdboLaycHCZXfT1NwkpwREYdiwEZx11jjKPX7TXq/jHh+6YXDppV805fESScJZEjz11J/58Y9vTXYZ/UJzSws5rhM71Oe2XZbOmXUaGuppamyCbML/J3wOpOiZ3bt3AtCTkwFtKIwwDHbu+NycovqJxsZGXI7Tu2bBkJ/58+dzxx13MH/+fIKhU8NZOqFQUIaRo3TppVfQHAjSHAyZ8njHPX5GjhzFqD5ynmZHEs6SQCakJ066Ox1P8MQQkCcUvtyXVu30NXv27A5fCIJSpqDkKCdtZyLic+DAfrJUlZw4hzQjhgFlZWWy/1wMmpubcdrSTrveYXdRXFzMk08+SXFxMY5Tzt102t0AtLTIdjLRmDfvIuw2mykLA1qCIRr9QS677AoTKks8CWcipQ0ZOoxjrSfmGhxtCQ9LDB48NFklpbytWz9BcSgo1QrKIQWtQGPHzs9P2phWxO7wwVIK9Z6fU1oE6IbO0aNHel5UP9FypnBmc+H1elm0aBFerxeH7eRw5rKH79PSIp36aGRlZXH2OedR6Q30eGjzeKsPRVGYN+8ik6pLLAlnIqWNHz+Jw802/G0LgPY12MnKzGDw4CHJLSxFaZrGR5s3ohfqRBo8xhCDUDDEtm1bk1tcH2YYBuXlZRSY8FiRxzh+XOa9RqulteW04BUNR1uga21tNbuklHXRRZfiC2nU+eNf4W207W02bdoM8vLyTawucSSciZQ2ceJkNAMONoW7Z/ubnEyYOAVF6dnQkOhcSclumhobMYZ1+K23ABSXwoYNa5NXWB9XX1+HPxhgQBe3MTBoAqqBzRgYdN55yCecm2VRUvS8Xg8OmzPm+0Xu4/V6zC4pZZ199rm4XC6O92BosykQwhMMceGFl5hXWIIlLZxpmsb111/PbbfdBsDRo0e5+eabueKKK7jzzjvlsGRhishE0LJWlaAOxz0KI0f2vcmhfcVHH21AsSkYgzsEAxW0wRoff7JZ9juLU2VlBRAOVmeyBagDWoG3277ujB2FbFVtf0zRPZ/Piz2OcBa5j8cjCwKi5XK5mD17DtW+IHqcQ5sVHj82m43zzus7xzWdKmnh7MUXX2Ts2LHtXz/++ON897vfZfny5WRnZ7No0aJklSZSSKSl3eBXaQ4oGAYMHGjG4JDozJaPN6EX6HDKlkLGEAOf1ysLA+IUCVJ5XdympJuvO8rTdSorjve0rH5B13WCwSB21RHzfSP3CQRk8UUs5s6dR0DTqI9jaNMwDKp9AaZOnU5mZpYF1SVGUsJZRUUFq1ev5qabbgLCL+amTZu46qqrALjhhhtYsWJFMkpLKNn7xno2mw2bqnKwycbGyvBvsX1tM8K+oqLiOFWVlRhFnbyvBwEqsvlpnCoqjqMAuV3c5tSPsa4+1vKBCplzFpXIKI5Njf3nRuQ+shgmNrNmnYPD4aDKE3uobQlqtAZDzJkzz4LKEicpn1KPPvoo99xzT/skyfr6erKzs9s/NIuKiqisrOz2cWw2hdzcvrslQmamE4cj9t/GRPT8fj+arlPWaqM1FPldJNSn3ze91bp14a6YUdhJOHMAA2DHzs/ktY9DdXUFeaqKXTfnF7oBwKdNTdjtOpmZsZ0X2d80N4dXE6mKrZtbnk5Vwj9zHA5V3vcxSefcc2ez7ePNTDSMmOYIV3nDge7yyy/p0695wsPZqlWryM/PZ+rUqXz00Uc9eixNM2ho6LsTLauqGsjIiO2sNhGbY8eOAmBXwaYYOG1w+PCxPv2+6a3Wr9+Akq7AGUYS9EE6+3ft4/DhcnJyuuoBiVOVHjjAQF2HHu5xFhEZ2N+1ay/jx0805TFTVWNjMwCKEvtAk9IW6FpavPIzJ0YzZ57Lxo0baAlqZDmjjyo1vgBnjR2Hzebula95QUF0Q60JD2effvopK1euZM2aNfj9flpaWnjkkUdoamoiFApht9upqKigsLAw0aUlXCgkk6OtduTIIQBctnDHYUiGzuHDB5NYUWry+Xxs++xTtKHaGfODMdiAnbBly0d88YtXJbbAPiwYDFJWdgwzB2mK2v48dOighLNu6G17y8UXzpSTHkNE7+yzzwWg2uuPOpwFNJ0Gf5Crzj3PytISIuFzzn72s5+xZs0aVq5cyZ/+9CfmzJnDH//4R84//3yWLl0KwJtvvslll12W6NISTv7BWm/fvr3Y1RPhbHRWkAP798prb7JNmzYQ8AcwRnQx7JYLSrbCylXLE1dYCjh48ACarmPmzny5QLqism/fHhMfNdXFP6QsW/fELj9/ACNHjqLGF30To8YXnh8YCXZ9Wa/Z5+yee+7hueee44orrqChoYGbb7452SVZTv7BWm9PyU5GZ2ntzZxxOSE8Xi/Hjsnu6GYxDIPid99CyVJgYBc3VEAbpbGnZDcHDx5IWH19XUlJ+DisESY+poLCMEOnpO28TnFmqhr+mJQFXIk3a9a5NASChKL8ZbrG6yczM5MxY86yuDLrJTWcnX/++Tz99NMADB8+nEWLFrF8+XKefPJJnM7Y95TpayScWcvv97P/wH7G5574zWt8bvhA3d27ZUsHs+zY8TmlB/ajjTvzkGaEMdpAcSi8sfifiSkuBWzfvo0BqkqWSfPNIkYB5cfLqaurNfVxU43NFp43Fk840/XwYgJZIR6fmTPPxjAM6qLonhmGQV0gxIwZZ7cH6r6s7/8X9GEytGatffv2oGkaE9oCGUChWyfXBbulY2AKwzB46e/PoaQrGKOi+PBygnaWxsYN6ygtle5Zd4LBIDt3fM5YC35WRHoLsr1J12y2cLDSjVA3tzydbkg464mJEyfjdDrbhyu70hLU8Ic0Zs48OwGVWU/CWRJpmpbsElLarl07UOCkcKYoMCE3wM4dn8kwhQnWrl3Ngf370CZrEOVOA8Z4A8Wl8L/PPS1/B93Yvv0z/IEAEyx47EIgW1HZsmWTBY+eOiKjOJoe+89rTQ//7HE4Un8kyAoOh4MpU6ZRH+g+GNe2BbgZM2ZZXVZCSDhLIo+n9y3zTSXbP9/GyGydDMfJAWByXpC6+nqOHy9PUmWpwev18PyLz0Ae0XXNIpygTdHYvWsn69evsa7AFLBp03qcioIVB46pKEw0dLZt/QSvV44XOhObzYbD4SCkx36kYEgPD8e53W6zy+o3pk+fRUsgiC/UdTiu9QUYPHgIAwZ0NfG175BwlkRNTY3JLiFleTwe9uwtYWre6T9Qp+aHfwvbuvWTRJeVUl599WUa6xvQZnU/1+xUxhgD8uB/n1vYvhm1OFkwGGTjhrVMMgwcJs83i5gGBIJBNm/eaMnjp4q0NDdB7fSfJaeeGnDq16G2+0g4i9/06TOAE52xzuiGQUMglDJdM5BwlnAdh3Hq6+uSWElq27r1YzRNY1bB6f+gC9N1hmQYbP5oQxIqSw379++luPgt9DF6eLv5WCmgna3R2NjA3//+nOn1pYLNmzfh8XqZaeFzjADyFJVVK2V7k65kZGQS1E4/SmhI7tguvw5ovrb7990zHpNtxIhRZGZkUtfFOZuNgRAhXWfq1BkJrMxaEs4SrOPwgaySss6aNavJS4NxOZ23ws8f5GPnru3ydxAHv9/Hfz/5OLjBmN6DOWP5oI/TWbbsPeliduKD5e+Rq6iMsfA5VBRmGTrbd3xOhRyEfkZZWVkEQqcP/Y4tmEmmKw+XPZ2zR17B2IKTo3QgFA5nckRW/FRVZeq06V3OO6tr66pNmTI1UWVZTsJZgrW0tHR6WZinvr6OTz/dzAWFPtQzjAbNGxzAMGDVqg8SW1wK+N//XUh5WRmhc0PhMzN7wJhqoOQoPPnnx6WT3MGxY0f5fPtnnGPoqBYNaUacQ/iD4P33iy19nr4sJyeHgHZ6OFMUBbczk2z3AM4aNPO07ZF8wdb2+4v4TZkyHW8whPcM887q/UGGDx9BdnbqvM4SzhLM7/e1Xw4ETm+Ti55bvvx9dN3g0qFnfn2L0nUm54dYtrRYVs3GYPny9/ngg6XoE/Xwcr+eskHo/BDNrc38/vcPEwzKkWYAxcX/wqYoxLLPuY/w3KabbroJt9uNr9t7hGWjMBlYsfx9vF5ZpNSZ3Nw8fMHYXxt/yIOiKGRlZVtQVf8xZco04ESHrCPdMGgMhJg6dXqiy7KUhLMEM4wT+xVJKDBfMBjk/ffeZsaAIEXpXe8NdeUwHzW1tTIZOkrbt3/GwoV/wSgyMKaauAVGDoTODbF37x6eeurJfr+9RmNjA6tWLmOmYZAZQ9fMB8yfP5877riD+fPnRx3OAOYBHp+X5cuXxlpuv5Cbm4cv0IpuxLbfnDfQSlZmdvtGtiI+w4ePID09nfpO5p01tc03mzQpdYY0QcJZwun6iQ+efv4ZZIm1a1fT2NTE1SO6/2g6uyDIoHSDf/1rcQIq69sOHSrlsd/9BiPLQD9fj3l1ZreGgz5F58MPV/KPf7xk8oP3Le+88xahUCjmg87TgOLiYp588kmKi4tJi+G+w1AYrSi8/dYb0r3sxIABAzEw8AVim4riDTYzcGCBRVX1H6qqMmnSFBqDpzc0GtoC26RJkxNdlqUknCVYKBTs9LLoOV3XeftfbzAiS2/fLqMrqgJXD/Oyd+8eSkrkOKczOXr0CA8+dD9+xUdoXggs2k/TmGSgj9Z5443XeOON16x5kl6utbWF9979F5OBghgTcBrhBUeLFi3C6/XGFM4ALjIM6hrqWb16RYz3TH2RvbM8geaY7ucNNjNgYGrsu5VsEydOpiUQJKCd3L1s8AcpGFhAfn48y8Z7LwlnCdbU1NTpZdFzn3yyhSNHj3LNCC/RHlt60VA/mU54c/Hr1hbXR+3dW8LPf3E3raEWQheFICPKOxqAF2gC5YAS/ro7ChjnGOgjdF555UVeeum5fjfE+d577+D1+bg4Cc89FhimKLz5xmsy5eIUgwaFJ1i2BqLfm9IwDFr9je33FT0zfvxEABoDJzc1GoMaEydNSUZJlpJwlmDl5WUAaBkFHCs7luRqUoeu67z6jxcoSDeYUxj9Tt5ptnD37ONPNrN3b4mFFfY9n3yymV8+eB8ePIQuCUEMWzUppQpKi4LiV1A/VVFKo0zLChjnGehjdJYsWcSf//wnQqHYzzTsi/x+P8VvL2EcMNjiFZqdUVC40DCorK5iw4a1CX/+3mzQoEEAtPqjD2f+kIeQFpRwZpKxY8ehKAqNHead+UIavlCIs84an8TKrCHhLMEOHjyA4nSj5Q7j+PEyfL5Ypu2KM1m27F0OHT7MzWNascf4rr56hI8cF/ztb3/pN0GgOx98sJTfPvZrghkBQpeGIMZtmpRypcuvu74zGGcb7XPQHnnkwX5xvNDatatpamnmwiTWMBEYqCgUv7MkiVX0Pi5XGrk5ebT4G6K+T+S2RUWDrSqrX3G73QwdOozGDvudNbVdPuuscckqyzISzhJsz94SgukF6BkFGLpOaen+ZJfU5x05cogXX3iGaQNCzC2MfR5fmh3+fXwLpaWlvPbayxZU2LesXr2Cp556En2QjnaJRsyTlwBOHRWLdZRMAWOygX6uzufbt/G73/0m5SeqL3u/mEGKwqgk1qCicJ5hsG//PkpLDySxkt5n8JAhtPjqo7595LaDBw+xqqR+56yzxtMSOjHnrCkQRFEURo+2cqvm5JBwlkA+n4/jx8vRMwaiZ4QnicoPwJ6pq6vlt488iFsN8p+TWqKea3aq8wqDXDrUz+LF/+zXG9Pu3Lmd//nLEzAI9At0sHd/HysZo8MBbfv2z/jrX/+c3GIsVFVVxYGDB5hlGChJGNLsaAbhD4aP5HizkwwZMpSWwOmds9EDpzJ64OnbODT76lFVVYY1TTRq1Gh8oRD+tkUBTcEQg4sG43LF8xtk7ybhLIGOHy8Dw0B352E43CgOF+XlMu8sXnV1tTz4wH/R1FDLXdObyE/r2eTxf5/gYWp+iL/85QnWrFllUpV9h9fr5ck//xHSDbQLNOglWzMZowz0STqrV69I2cCwbVv4+KoJSa4DIB2FESh8+snmZJfSqwwdOgxfoBX/Kcc4jRo4lVGdhrM6CgcV4XD08BgN0W7kyNEAtATDw5mtIYNRo8d2dZc+S8JZAtXUVANguDJBUdCdme3XidgcP17Oz+//GXU1x/m/MxsZk93z1WV2FX46o5lJuUGefPJx3n//HRMq7TteeeVFaqqrCc3u+bFMZjMmG5ALf336f2htTb1jz44fL8OuKPSWTReKMCgvL+93q2W7MnTocACavdEdM9bsr2PosOFWltTvDB8+AgiHM0038ASD7delGglnCVRd3RbOnOH9CDRHBpVVVcksqU86dOggP7//Z3gaq7lvVhPjc81b9u+ywd0zm5k5IMjf/vYUixa92i8+oHbu3M677/0LfaxOr0kIHamgnavR1NTIM8/8NeX+ThobG8lUlKQPaUZkAD6/L+Xn+cViWFvQavLVdntbXddo9tanbHBIltzcPNLd6bQGNVrbFm8NS9EALOEsgQ4fPohid2E43ADo7hyOl5cRCES/9UN/V1q6n18+cA+2QBMPnNPI2Jzug5lhQL1fpbzVxopjzm5PZnDa4M7pLXxhsJ9//OMlXn75hZQLAx3V1dXy+B8fRclSMKb14v/OPNAn6axZs4ply95LdjWmysrKptUwMKLaEM56HiDN5cLptGjH4T5o0KBCnE4Xjd6abm/b4m9ANzSGDZNwZiZFURg8ZAieUAhP2yHoQ4YMTXJV1pBwliCGYbB126cEMwuJzFrXswajaSF27dqR5Or6hrKyY/z6V/fjNrz88pxGhmZEd87dijInlV4bTUGV50oyWFHW/QeOTYVbJ3u4fKifN998nTffTM1NagOBAI/97jc0tzYTmtP7hjNPZUw2MIoMnnn2ryl1qsPQocMIGgYVyS6kzVFFSdkPvXipqsqwYcNpiiKcRQLciBESzsxWVDQEn057OCssLEpyRdaQcJYge/eWUFtTjZY/qv06LWcIit3JunUfJq+wPsLv9/GH3/8GJdjKvbMaGeiO/gDirdXOLr8+E1WBf5/oYW5hgFdeeYFt2z6Nqea+4OWXX+DA/n3heWY5ya4mCgro5+sY6Tq/+/3DtLa2JrsiU8yZcwE2VWVrDx7j1Fwdb86uwuCYYfCFCy/pQTWpaeTIUVENazZ6q1EUpX2emjDPoEGFeIMhvCGNjPQM3O70ZJdkCQlnCbJ69UoUm51Qh3CGaieQN4r1G9bJZrTd+Oc//8HRY8f4weRmitKjD2YAfq3rr7uiKvAfk1sZmqHzl//5U0r9PR07doTid99CH6PDMJMfPBjeNPKmm27C7XaDmVOXnBA6L0RTYyOLFr1q4gMnT3Z2Dl+48BI2Kwo1cQ5tTuzm62gYGLwHuNPSuOSSy+OqI5WNGDESb6DltBWbp2r01FBUOBiXy5WgyvqPgQML0A2DJn+IgQWpe6i8hLMECAQCrF33IcHckWA7uWsTGjiOgN+XslsEmKGmppp33nmTLwz2M21A4nfwd9lgwcQW6urreSeFdk5fvXolBgbGVAvmOQVh/vz53HHHHcyfP9/ccAaQD/ownRUrl6LrsYX13urf/u0WXK403kQhFEdAmw3kE57Mf13b17H6BNgPfPNb/05OTm4cj5DaRowYBUCjp+tV9k3+GkaNHp2Aivqf/Px8ILzH2cCBEs5ED6xZswqvp5Vgwem7GOlZReDOofjdt1N60nlPvPXWGxiaxk1jkte1mpCrMWtgkHfeXpwyRwnt2r0D8gArfrl3QHFxMU8++STFxcXWzGUbDK0trSmzV2Bubh7f/+EdHMHgbYh5cYCCQjZQAJxH7Cs/D2HwjqIwffpMrrpqfkz37S9GjhwFQIP3zOEspAVo9ja078klzJWbm9fp5VQj4cxiXq+XV197BSOzAD27kzPWFAV/0TQO7N/Lpk3SPTtVbW0Ny5e9xxcG+2OaZ2aFr4z20tzSynvvvZ3UOszS0tKM0cONe8/IEX7vL1q0KBxmLQhnkdpTZd4ZwLx5F3HTTd/gU2A5sQe0eJVh8LKiUFg4mLvvvg+brZfsQNzL5ObmkZmZRaPnzIsCGr21gNHeZRPmGjx4KBkZmSiKkpIHnkck+XCW1Pfaay9TX1eDb/K17as07dX7AAgVjGv7czzOqt387ZmnmD59JhkZGUmrt7d5/vm/YeghvjI6+XO9zsoJd8/eWPQqF110aZ9vqdvtDkj+yxq/trmDNltq/Rj7+te/RVNTY/t2IVdg7ZFOZRi8oChkDRjIAw8+QkZGjKfc9yOKojBq1GiOHzpz56yxrasW6bIJc2VlZfHCC+G5pkq85/X1AdI5s9CRI4d5550lBAdNDA9ftrFX78FevefEDRUV36gv0NhQzz//+UoSKu2dVq36gA0b1nH9KC+Dktw1i/jOBA9GyM9/P/F7QqHEz38zU1FhEWqLSi/ZWitmSnP4B3NBwaAkV2IuVVX5z//8IVdccTVrgWJAt+gv6SAGzykKmfkD+PVvfsegQan1Wlph5MjRNHprzjgNpdFTjdPpkjM1LaQoSkoHM5BwZqk333wdbA4Cw87t9rZ6ZgHBgeN4f2kxzc1NCaiudysp2cXTf/0zk/NDXDeq97R3Brl1bpnYyq7du3jmmaf69DzBmTPPwWgx4PSznHs/A9RjKqPHjCEnpy/sARIbVVW57bYf8eUv38BHwBuAZnJAK8HgRUVh4OAhPPLbP0qYiNKIESMJaUFa/Z3/w2n01jBi+EhUVT5eRfzk3WMRn8/Hxo3rCeSPAUdaVPcJFU0hFAyyceN6i6vr3crKjvHoIw8ywBXkx1NbsPWyd+m8wQGuG+Vl+fL3Wbz4n8kuJ25z536B9Ix0bNts5nfPTp2yZPIUJuWwAvVw1ZWpO3FdURT+7d++x7e+9e98DrwMBEz6i/oUg38AI0eP5eFH/sCAAb3xzK7eacSIkQBnPCmgyVfDiJEjE1mSSEG97GMvdWzevJFgMEBowJio76OnDwB3DmvWrLKwst6tubmJRx9+AJvm4f/ObCLL2fMPI29IOWnPLW+o5+3wm8f6uKDIzyuvvMiGDWt7/HjJkJWVxS0LboMaUD5XTA1oxhCjy697pA5s22xMmDiJyy+/0rzH7YUUReHGG7/GD35wB/sVhecVBW8P/6I2YPAmMHX6TH7168fIzk69zqOVhg8/czjzBVvxBT0y30z0mIQzCwQCAf75+j/AnYue1ckKzTNRFPwFE9i9eyeffdaTvcL7rmeeeYrqmip+Oq3JtHlmnpBy0p5bHhPCmarAf072MC5X4///yxPU1nZ/pEtvdMkll/OlL12LuldF2WleQDPGGBiZBobLQD9bxxhj0gPXg32dnQG5A7n7Z/f1m6GjL37xKu6++z7KFZXnFIXWOP6iDAxWtW0yO2fOPO6//6HwBsEiJm63m4EDCjoNZ03e8OkBkQAnRLwS/pPt+PHjfOc73+Gaa65h/vz5vPDCCwA0NDSwYMECrrzyShYsWEBjY2OiSzOFpmn85S9PcLy8DN+IOe0rNKMVKpwM6Xn86f/7PeXlZRZV2Tvt27eHdevW8JVRXsblxrCNfzfS7cZJe26l280JCg4Vvj+5BS3o57XXXjblMRNNURRuueU2LrvsCtTdKsonCpiRiRXADWSDMdbAlMWGFWD/0E5eZh4PPfgo+fkDTHjQvmPOnHncd/8vqbHZeF5R8MQY0FYDKwkH8rvu+i8cjl5+kGovNmLkSJo7OcYpEtiGDTg4vXUAAB/5SURBVJNjm0TPJDyc2Ww27r33Xt59911ee+01XnnlFfbv38/ChQuZO3cuy5YtY+7cuSxcuDDRpfVYMBjkv//7cdat+5DA8NlouXGciaPa8Zx1Oa2+IL/4xX9x6NBB8wvtpTZsWIddhatHmLsAwG03Ttpzy21SOAMoTNc5r8DHpo1r0TTzAmUiqarKD3/4E7761a+jHlSxrbaBJ9lVdWCAskvBts7G8CEj+N1jTzB48JBkV5UUs2ady333P0SNqvKSouCPMqBtwGAlcPHFl3H77XfKPmY9NGzYCJp99ejGyb/JNPlqcbvT+90vDsJ8CQ9ngwYNYsqUKQBkZmYyZswYKisrWbFiBddffz0A119/PR988EGiS+uR+vo6HnzoftavX0Ng+GyCQ2bE/ViGO5fWidfQ6Atw3/13s2XLJhMr7b0aGurId0N6H9u2amimRqvH26e31lAUhW9+89+4667/wtXqwv6BPTzpPtmLUVvA9qENdafKRRdeyqOP/LHff/DNmDGLu++5n3JgEd1vs7ErMpR5/gXcfvud/WYo2ErDhg1H00N4/CeP8DR76xg6dFjKb/MgrJfUj8Fjx46xe/duZsyYQW1tbfseOwUFBdTWnt4yPpXNppCbm/wT6Tdv3sxjv3uMpuZmfGddijZgbI8f00jPwzP5yxj7lvPYY7/hq1+9ie997z9S+iDdgQMH0OhTCOrhIcO+osankuZyMXBgdp/vSMyffzUzZkzl0d8+zJ7NezCOgD5Lg0TvS6qDslfBtstGmjONH93zY6666mr50GtzxRWX0drayJ///CQrgS+2XX/2KberxOANRWHCuHH88sFfpvTPj0SaMCH8M77ZV09m2okjhFoC9cwcc0Gv+FwSfVvSwllrayt33HEH999/P5mZJ//kj3aDOU0zaGhI3vhLXV0tL730HGvWrMJIz8M7+csY6fmmPb7hzMAz6Vqchz/ijTcWsX79Br73vds4++zu903riyZOnMrixW+wrcbB7EFmn5RtjZAOn1SnMXXaTJqb/ckuxxSZmfn85td/YOnSd/n7358jsCyANl7DmGgk5idGBdg/s2M0Gcw+fw7/+R8/ID9/AI2NqXGmqVkuvvhKdu0qYeWKZZyFwSgUZnWY3BfCYJGikJaZxd33/AKvV8Pr7U3j1X1XdnZ465EmXx2DCa/ID2oBPP5mBg4sTOrnkujdCgqyorpdUsJZMBjkjjvu4LrrruPKK8NL4QcMGEBVVRWDBg2iqqqq/eT53qi5uYm33lrMO8X/IhgKERgyk+DQmaBa8HKqdgKj5xHKH0nF4Y088siDTJ02g2/+n+8wYcIk858viWbOPIdBAwfy9iGNcwuCsa6lSIq1x500+OHqq69JdimmstlsXHPNdZx//lxeeuk51q5djXJYITQjBEMxZ4L/qVpB3aailCsMKirkez/6PuecM9uCJ0oNiqKwYMGt7Ph8G2/W1PBjQ8fe4S9mLVBhGNx7+0/6/VCw2bKzc0hPz6DFV99+XYs/fHnIkKHJKkukkIQPHhmGwc9//nPGjBnDggUL2q+/7LLLWLJkCQBLlizh8ssvT3Rp3WpoqOell/6XW29bwJtvvo43axieaV8lOPxca4JZB3rOMFqn3oh/xBx2luzl/vvv5qGHfs7Ondstfd5Estls3Pz1b1PaZGNDhTPZ5XTLE4I3SjMYP34CM2eek+xyLDFgwEDuvPMeHn749wwvGIFtow11nQpmnjWug1KiYF9mx1nj4tvf/i7//cRfJZhFwe12c9sP7qDO0Nnc4fpmDNYpCnPnzmP27DlJqy+VDR48pD2QAbT4wicGFBXFsH2SEGeQ8M7ZJ598wltvvcX48eP5yle+AsBdd93Frbfeyp133smiRYsYMmQITzzxRKJLO6P6+jqWLFnE0qXvEQwFCeWPITB+JkZ6Xvd3NpNqIzR4Ki2DJmCv2s2OPTvY/st7mThpCt/4+reYNi3+RQi9xSWXXM7S99/h7/v2M3VAkBwTNqG1yj/2pdMYULjve99P+blQkyZN4fE//Jn33nubV/7xIoHlAbQZGsaoHm6T0Qy2zTaog9nnnc8tt9yWcmdlWm3GjFlMnz6TNds/Y7Zh4EBh3f9r787joqz3/o+/rmuGZdgElMUQF9xFJHLDfSUhNUzcypOa1a/uFrrVIk4qJY+0bo8t53S0+1TeqZ0eLT8zyajslJUdTSBTEZXMFTQBww0YYLbr/oPk1lYSmGsYPs+/5nKGmfcAzuPNdX0XwK4o3HbbXL3jua3w8PbsO51ff1xZK+VMNB2nl7MBAwbw7bff/uJ9l9c8cxUXLpwnK+sdPvwwG6vNirVtN6zXXY9masSK2pqGYjGD3YKx9BC20F5/eC00DB7Y2vejMqwPxrJvKTyWzxNPPEafPjHMmjWb6OiYa8+nM1VVuf+BhaQ9ksqaAj8ejatAbWTv8TL89vG12FniwWenvZgyZSrduvVo/BO2AAaDgUmTpjBo0FBe+PszHPy6AK1EwzHQcU2fJMpJBcNuAyZvE/cuTGXYsBFNH7qVmDJlGpn5ezkI9EFjr6IyOH6YXGJrRmFh4VTV/huH5kBVVKpqL+Dn54/JJJMBROO1sEULnKO0tIT338/iX//66MdS1hVrRByad+O3OTGWHUKtrdvY3OvEDkCrW3j2WqjGuv04Q3tiLCvk4JF8Mn48kzb1lmnExQ1okdPmO3bsxP+75wFWr36e/znkw529zY0afxYXYmFfucdVx41x6LyRlw/50atXb269dU6jnqslCg0NZdkTT/Hee5v45z/XoVao2IbaGj6jUwNln4L6nUqv3r1ZuDBdxkQ1UkxMLGEhoew7W4YHYNYcjB8/Qe9Ybi00NAxNc1BtqcDXqw1VtRdl83jRZKSc/cjhcJCfv5ePtmbzdV4OGkpdKWvsmbKfMJwv+tnxNZezy1QjtvC+2EJ7YSwrpPBYAStWLCO8fQRJiTcxevQ4/PwaNkPEVYwdm0BpaQkbN76JosC8nuZr3gB9XISFD096U21TSOlazdiIay9nBeeMPJ8fQFj4dTz66FKMxtb5X0hVVaZMmUaXLl1Z9cxTVH9mxjbcBr93pd8Oao6KclohKWkS8+bd3Wq/h01JVVUGDBrC1g/ew6RpmLy9W/QZ9Jbg8uV384/lrNpaQVSYe03SEvpp9Z+K5eU/8Nlnn/CvT7byw9kyFA8TteEx2MKi0bx8m/z1FIftN48bpb6k9cFw7hjflx3k1Vdf5rV/rmNI/DDGj59AdHRMixkfNWvWn3A4HGza9DYXalX+o2/lNS1QqygQ5OUgyAvGdbj2Yrb9e0/WFvoSEdGBpRnLZcNo6sY7PbXiGZZlPsb5L85jG2mDX5tobQd1h4pSqjBv3l1MnnyLU7O6u5iYWLKzs8gHBvTtJ6W3mbVrFwKAufYiml/Ej8toyHhJ0TRa5f9eTdPIz9/LBx++z+6vc9A0DXvAdVi7jsEe3BnUlr2QKKqKvV03qtt1Q60qx1hWyL+/+oovv/yc8PYRJE5IYuzYG/H1bfry2ZQURWH27Lm0bduWtWv/m8fzAvnPmEtE+DXNhugNZXXA64dNfHLKm5iYfjzyyGJ8fZ29Kqvr6tAhkqdWPMviJY9Q/uUP2Ebb4Ke9VQN1V10xu//+/2Ts2ARdsrqzqKiuV9zupmOS1qFt27q1zszWSiz2GmwOK23byuV50TRaVTnTNI3c3F28+dY/KTp5AsXTRG14P2yhPdG8A/SO1ywcvm2xdBmGpeNgjOeO8/3ZQtate4U333ydpKRJ3HLLdJcvaYmJk4iM7MQzq5azNE9hbo8qRl5ncco6aCVmlb8X+HPikkpycgqzZ89t8bsANId27ULIXPY0f35sEZd2XsQ27idniAsUlO8V7rzzXilmzeTKcXsyEaD5mUwmTN4mqi2V1FgqAQgObqdzKuEuWt5o8Wt0/vw5lmUuYeXKJykqu0Bt1EgqY2dh7TjQbYvZVQxGbCHdqe4zmeq+U6j0Cefdd/8/9z9wNzk5X+md7ndFR8fwzLNr6NE7hpcP+fLiAR/MzbyV5Y4znizJbcNZmx/p6UuZM2e+FLPfEBYWTtoji1HMCso3VzTnUlALVcaPn8BNN03WL6Cbu3K4gixH4hxBQcFUWyuotl4uZ667eLpoWVpFOTt9+hSLHk6l4EABtZ2GUBUzFVtID30uX9otmEwmpk2bhslkAnvjZg5eC4dvO2q7j6O67xQu2T1YufJJsrLecXqOPyooKJiMjOXceuvtfFXqzZLcQE5WNP3PsNYOLx/04cUDvnTu1ptnnl0tC3k2UK9efZg+/VbUYhUsgAbGvUZCQkOZP/8eveO1GjIe0jmCgoOptZqpsdatyhwY6OS1L4XbcvtyVltbw5NPPs6lqhqq+kzGFh4Nin5vW7FZmDhxIqmpqUycOBHF5vxydpnDtx3mPpOwBXdhw4b/IS8vR7csDWUwGJg2bRaZmU9j8wrmibwAPj/ddLsJlJhVnvi6DV9870VKykwyM/9LzkL8QcnJKQQFB4MZqAXtksa8uXfJpttOcPmPiKAgKQnOEBgYRK3dTI3V/ONxoM6JhLtw+zFnH36YTVlZCdW9J6L56D9YUzN6kp2dDUB2djaa0aRvINVIbdfRGGou8uq6lxkwYFCLmM3Zp09fVj2zmuefX8kr+Xs5fsnI7T3NGBvRu/PLjfy9wB/V05clS9KIi3PPDeabm5eXF4kTJvLGG6+hOTTahYTImUcnWbQonaqqSlkI1UnatGlDrdVMrc2Mh4cH3t46f54Lt+H2Z87y8/ei+QTjCHCRLTUMnlRXV7Nx40aqq6vB4AJ7SKoGLCG9KC05w9mzZ/VO02Bt2rRhyZJMpkxJ4dPTXqzc60+V9dqK5b+KvfjLXn9C2ndi5V/+JsWskeLjhwGg2BXiBw+TsXpO4uHhIZfWnCggoA0WWy3Vlkr8/QJaxB+2omVw+3IWFBSEWluBYi7XO4rrctgwXizGaPTAz69lLRFhMBi4/fb5PPjgQr696EXm7jacq2n4B6SmwRvfmVj/rQ/9bxjI8hXPEBYW3oyJW4eIiA71t3v1auQiy0K4KH//uslkFTXn628L0RTcvpzNnPkn/Hx98DnwHh5FOSg1Fbrm0VTjbx47lcOG8exhfAs2Y7hQzJw58/HxaZmXQ0aPHkdGxpOcs3mTuTuQMvPv/2o7NFh7yIfsk95MmHATaY8urZukIRrtyjMIsqyDcFeX/5itqr2Af0DL2oVFuDa3L2ehoaH89fk1DB86HK/SA/jsewvvQ9kYzxSg1Fxyeh57UMffPG7+AFYM507gefQL/Pa+idex7bQP8iUj40kmTrzZuVmaWN++/ViW+V/UqL4s39OGH6p//ddb0+DVQh8+/96LqVNncPfd98mlt2Yil9mEu7q8LZ7FXiMLU4sm5fYTAgDatAlkwYI05syZzyefbGXHzi85XbQLinaBqQ1W//bYAyKwB7QHD+9mzWIL7Y3HmQKwW7B26I8ttFezvh4OB2rVWQyXvsdw8XsMVWXgsGPy8WVAfDzjxt1I37793GasRNeu3Xn8iRU8npHOX/ZpZPS/iK+H9rPHbTrmzWen64rZbbfNcZv374pa6tlYIX7Plb/brr6Yt2hZWkU5u6xt23bMnDmbmTNnU1Jyhq+/zmXfvj0UHNiPpawQAM23LTb/9tgDrqsrawaPpg2hKGiePoAPtubYJFfTUM3nUC+dxnDpDB4VJWh2KygKnTp1IXZMMjfcMIDevaPddu+9qKhuPJqeQWbmEv77gC8LYyuvun93mQfvHjcxZsx4KWZO4K6/Z0L4+Pj+4m0hGqvVfmqGh7dn0qRkJk1KxmazcfTod+zfv4/9+/dRWHgIW0kBKCp2/zDsbTpgC+yIZgrEKXsG/VG2WgwXijFcPIXnpe/RLHVr7rS/LoLY+LozY3379mtVA1b79u3HvHl3sXbtP/j0inXQLloUXi70IyoqinvueUCKmRPI91i4qyvPnMkZYtGUWm05u5LRaKRnz9707NmbadNmYbFYOHToAPv27eGbPbspLsrDszgPvAOwBnbEFtQJh3+YvovZ1lZgOH8S4/mTGCpKQNPw9fMnbtBArr/+Bvr1u75+Y97WKilpMrm5X/F2YT4RPjYMisbbR0xU2w089FAaHh5NfFZUCNGqXLmumUwmEk1Jytkv8PT0JDY2jtjYOObMmU95+Q/s3p1HXl4O+fl7sZUUoHiYsARGYg/qhL1NBDT3rEtNQ6k+j/HcCTwuFKFU/QBAhw4dGTR+OgMHDqZbtx6oqtvP8WgwRVGYP/9eFiy4jwAPBz2DbLx5xIekmybRoUOk3vGEEC2ct/f/jVH28mre8cqidZFy1gBt27bjxhuTuPHGJKqrq9m7dze7du3k669zqTl7GMXggbVNB2zBXbAHRjbdODVNQzWXYyg/jueFE1B9ERSF7t17Ej/4ZgYPHkL79tc1zWu5qY4dO9G//0COFuRxnZ8dDYXk5Kl6x2oV/P0DqKhw/oxoIZzlyj+GpZyJpiTl7A8ymUwMGTKcIUOGY7VaKSjIJydnJ1/t2knlkeMoBiOWoM7Y2nXHEXDdNY1RU2orMP5wBM/yI1B9EUVVienbj/j4YQwaFE9QUHAzvDP3NWrUWHbvzmPLCRMxMTGt/nKvs6SnLyU/f6/eMYRwCm9v2TtWNB0pZ43g4eFBXFx/4uL6c/fd93HwYAE7dmzny39vp+aHI2BqQ21YNLaQHg267KlWlOBxZj/GC0WgafSJjmHUyDEMHjykVQ3mb2r9+l1ffzs29gYdk7QuvXr1kd0BhNtLTV1EcXERMTHX//6DhWggKWdNxGAwEBMTS0xMLPPn30NOzk7e27KZY0d34nUmn9qIG7C16/6LZ9KU6gt4FeVguFCMj68fibdMJyEhidDQUB3eifu5sth27hylYxIhhLsZNWqs3hGEG5Jy1gw8PT0ZMWI0w4ePoqAgn9dee5WjR7djLD9KTddR//dATcNYsh+vU7sxeXuT8qd53HTTZBm70Ixk30whhBCuTtE07efLp7cQVqudCxfMesf4XQ6Hg48//pB1617BqnpiN5pANeDwDsDj7GEGDRrCPffcL9vcNKOUlIkAbNjwtqzkLYQQQhchIQ3bg1XWXXACVVVJTJzIihWr8DGCofocSm0lHmcPM23aLNLSFksxa2Zdu3YHrp76LoQQQrgiOXPmZN988zXLlz8OwMBB8TyatkRWUHeCsrJSiotP0r//IL2jCCGEaKUaeuZMxpw5WVxc//rbkyYmSzFzktDQMEJDw/SOIYQQQvwuuazpZFeWsc6du+iYRAghhBCuSMqZjnx9/fSOIIQQQggXI+VMB5fX3ZJLmkIIIYT4KRlzpoNHH13C/v379I4hhBBCCBckszWFEEIIIZxA1jkTQgghhGiBXK6cbd++nQkTJpCQkMBLL72kdxwhhBBCCKdyqXJmt9vJzMzklVdeITs7m/fff58jR47oHUsIIYQQwmlcqpzl5+fTqVMnIiMj8fT0ZOLEiXz66ad6xxJCCCGEcBqXmq1ZWlpKeHh4/XFYWBj5+fm/+niDQSEw0McZ0YQQQgghnMKlytkfZbdrMltTCCGEEC1Ci5ytGRYWRklJSf1xaWkpYWGyH6IQQgghWg+XKmcxMTGcOHGC4uJiLBYL2dnZjB07Vu9YQgghhBBO41KXNY1GIxkZGdx1113Y7XZSUlLo3r273rGEEEIIIZxGdggQQgghhHCCho45a9HlTAghhBDC3bjUmDMhhBBCiNZOypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLkTKmRBCCCGEC5FyJoQQQgjhQlxqhwB3d+bMGdLS0igvL0dRFGbMmMHcuXP1juXWamtrmT17NhaLBbvdzoQJE0hNTdU7VqtweZePsLAw/vGPf+gdx+2NHTsWX19fVFXFYDCwadMmvSO5vUuXLrFkyRIOHz6MoiisWLGCuLg4vWO5rWPHjrFgwYL64+LiYlJTU5k3b55+oZqJlDMnMhgMpKenEx0dTWVlJSkpKQwbNoxu3brpHc1teXp6sn79enx9fbFardx2222MHDmS66+/Xu9obm/Dhg107dqVyspKvaO0GuvXryc4OFjvGK3G8uXLGTFiBH/729+wWCzU1NToHcmtRUVFkZWVBdT98Tdy5EgSEhJ0TtU85LKmE4WGhhIdHQ2An58fUVFRlJaW6pzKvSmKgq+vLwA2mw2bzYaiKDqncn8lJSV8/vnnTJs2Te8oQjSLiooK8vLy6n/HPT09CQgI0DlV6/HVV18RGRlJRESE3lGahZQznZw6dYpDhw4RGxurdxS3Z7fbSU5OZujQoQwdOlS+506wYsUKHnnkEVRVPmKc6c4772Tq1Km89dZbekdxe6dOnSI4OJg///nPTJkyhcWLF2M2y17PzpKdnc2kSZP0jtFs5JNTB1VVVaSmpvLYY4/h5+endxy3ZzAYyMrK4osvviA/P5/Dhw/rHcmtffbZZwQHB9O3b1+9o7Qqb7zxBu+++y4vv/wyr7/+Onl5eXpHcms2m42DBw9y6623snnzZkwmEy+99JLesVoFi8XCtm3bSExM1DtKs5Fy5mRWq5XU1FQmT57MjTfeqHecViUgIIDBgwfz5Zdf6h3FrX3zzTds27aNsWPHsnDhQnbt2sXDDz+sdyy3FxYWBkDbtm1JSEggPz9f50TuLTw8nPDw8Poz8YmJiRw8eFDnVK3D9u3biY6Opl27dnpHaTZSzpxI0zQWL15MVFQUd9xxh95xWoVz585x6dIlAGpqati5cydRUVE6p3JvixYtYvv27Wzbto1nn32W+Ph4Vq1apXcst2Y2m+snXpjNZnbs2EH37t11TuXeQkJCCA8P59ixY0DdGKiuXbvqnKp1yM7OZuLEiXrHaFYyW9OJdu/eTVZWFj169CA5ORmAhQsXMmrUKJ2Tua+ysjLS09Ox2+1omkZiYiJjxozRO5YQTaq8vJz7778fqBtjOWnSJEaOHKlzKve3dOlSHn74YaxWK5GRkTz11FN6R3J7ZrOZnTt3kpmZqXeUZqVomqbpHUIIIYQQQtSRy5pCCCGEEC5EypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLkSW0hBC6Kpnz57ccccdpKenA7B27VrMZjMPPvhgo587PT2d0aNHN2ol8ZKSEpYtW8bRo0dxOByMHj2atLQ0PD09gbrlcL777jtSUlIoLCwkNzcXf39/VFUlIyODuLi4Br9WTk4OHh4e3HDDDdecVwjR8smZMyGErjw9Pfn44485d+6c3lGuYrPZ0DSNBx54gPHjx/Pxxx+zdetWzGYzzz33HABnz55l//79bNmyhXnz5gGQlpZGVlYWixYtIiMj4w+9Xm5uLnv27GmOtyOEaEHkzJkQQldGo5GZM2eyfv16FixYcNV9Pz3zFRcXx549e8jJyeGFF17A39+fw4cPk5SURI8ePdiwYQO1tbWsXr2ajh07ArBz505eeuklqqqqSE9PZ8yYMdjtdlatWkVubi4Wi4XZs2cza9YscnJy+Otf/0pAQADHjx/niSeewMvLi5SUFKBun9bHHnuMcePGkZqayvz58yktLSU5OZmlS5delX3gwIEUFRUB8Pbbb/PWW29htVrp1KkTK1euxGQykZ6ejqenJ4cOHSIsLIw9e/agqirvvfceS5cu5ezZs6xevRpVVfH39+f1119v7h+HEMIFSDkTQuhu9uzZ3Hzzzdx1110N/prCwkI++OADAgMDGTduHNOnT2fjxo2sX7+e1157jcWLFwNw+vRpNm7cSFFREXPmzGHo0KFs3rwZf39/3nnnHSwWC7NmzWLYsGEAHDx4kC1bthAZGcmGDRuIjo6+6nX9/Pxo3749J0+e5MUXX+Tee+8lKysLgI0bN9Y/btu2bfTo0QOAhIQEZsyYAcBzzz3Hxo0buf322wEoLS3lzTffxGAw8MILL+Dj48Odd94JwOTJk1m7di1hYWH125AJIdyflDMhhO78/PxITk5mw4YNeHt7N+hrYmJiCA0NBaBjx4715apHjx7k5OTUPy4pKQlVVencuTORkZEcO3aMHTt28O2337J161YAKioqOHnyJB4eHsTExBAZGXnN72XlypW8+OKLBAcHs3z5cgC+++47nn/+eSoqKqiqqmL48OH1j09MTMRgMPzic8XFxZGenk5SUhIJCQnXnEkI0bJIORNCuIS5c+cydepUpk6dWv9vBoMBh8MBgMPhwGq11t93eUA+gKqq9ceqqmK32+vvUxTlqtdRFAVN01iyZAkjRoy46r6cnBx8fHzqj7t161Zf4C6rrKzkzJkzdOrUifLy8p+9j7S0tJ9NQEhPT2fNmjX06tWLTZs2kZubW3+fyWT6le8IZGZmsm/fPj7//HNSUlJ45513CAoK+tXHCyHcg0wIEEK4hMDAQBITE6+6NBgREcGBAweAusuEV5azhvroo49wOBwUFRVRXFxMly5dGD58OG+88Ub98x0/fhyz2fyzrx0yZAjV1dVs3rwZqNtU/Omnn+aWW275zVL1U1VVVYSEhGC1WtmyZcuvPs7X15eqqqr646KiImJjY3nooYcICgqipKSkwa8phGi55MyZEMJlzJ8//6pB7zNmzOC+++7j5ptvZsSIEVed1Wqo9u3bM23aNKqqqli2bBleXl5Mnz6d06dPM3XqVDRNIygoiDVr1vzsaxVFYfXq1Sxbtow1a9bgcDgYNWoUCxcu/EMZHnroIaZPn05wcDCxsbFXFbArjRkzhtTUVD799FOWLl3KunXrOHnyJJqmER8fT69evf7w+xdCtDyKpmma3iGEEEIIIUQduawphBBCCOFCpJwJIYQQQrgQKWdCCCGEEC5EypkQQgghhAuRciaEEEII4UKknAkhhBBCuBApZ0IIIYQQLuR/AR4Aa/iH1ImqAAAAAElFTkSuQmCC\n",
489 | "text/plain": [
490 | ""
491 | ]
492 | },
493 | "metadata": {},
494 | "output_type": "display_data"
495 | }
496 | ],
497 | "source": [
498 | "plt.figure(figsize=(10,5))\n",
499 | "seaborn.violinplot(data=df, x='NumberOfParts', y='Length')"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": 161,
505 | "metadata": {},
506 | "outputs": [
507 | {
508 | "data": {
509 | "text/plain": [
510 | ""
511 | ]
512 | },
513 | "execution_count": 161,
514 | "metadata": {},
515 | "output_type": "execute_result"
516 | },
517 | {
518 | "data": {
519 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAE1CAYAAABAwFwJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGotJREFUeJzt3X9s1IX9x/HXcaVQpOUotncQKyrDrWFSF5VC2kVXdi1YGK20mX9s+QJzbGiCFcQoBsUJbEa2ka3ZhOhkGJfIr0K0Rn6UQemgOGWCYeCGs0m70SsU+oNSOO76+f7B1/vqRAtH+75eeT7+kms/93m/c9I+ufv06nIcxxEAAADMDIj1AAAAANcbAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgLGEWA/wVbq6uhQO271Rv9vtMj2fNfaLb/15v/68m8R+8Y794pf1bgMHuq/4c/t0gIXDjlpazpmdz+MZYno+a+wX3/rzfv15N4n94h37xS/r3dLSkq/4c3kJEgAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAw1qd/F2S0hqYkKWlQdKtdze9x+lTnhZDOtnVGdT4AAHD96ZcBljQoQbc8WWl2vrpfFOqs2dkAAEC84yVIAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMauKMDWrl2rwsJCTZs2TQsWLNCFCxdUX1+v0tJS+f1+lZWVKRgMSpKCwaDKysrk9/tVWlqqhoaGyP2sXr1afr9fBQUF2rt3b+9sBAAA0Md1G2CBQEDr1q3Tpk2b9NZbbykcDquyslIrV67UrFmztGPHDqWkpGjjxo2SpA0bNiglJUU7duzQrFmztHLlSknS8ePHVVlZqcrKSr388st67rnnFA6He3c7AACAPuiKngELh8M6f/68QqGQzp8/r7S0NNXW1qqgoECSVFxcrKqqKknSrl27VFxcLEkqKCjQ/v375TiOqqqqVFhYqMTERGVkZGj06NE6fPhwL60FAADQdyV09wler1dz5szRd77zHQ0aNEg5OTkaN26cUlJSlJBw6XCfz6dAICDp0jNmI0eOvHTnCQlKTk7WmTNnFAgElJWV9bn7/fSYL+N2u+TxDIl6OUvxMKfbPSAu5owW+8Wv/rybxH7xjv3iV1/erdsAa21tVVVVlaqqqpScnKxHH33U7PqtcNhRS8u5qz4uLS25F6b5atHMac3jGRIXc0aL/eJXf95NYr94x37xy3q3q+mPbl+C3Ldvn2666SalpqZq4MCBys/P18GDB9XW1qZQKCRJamxslNfrlXTpma0TJ05IkkKhkNrb2zV8+HB5vV41NjZG7jcQCESOAQAAuJ50G2CjRo3SoUOH1NnZKcdxtH//fn3ta19Tdna2tm3bJkmqqKhQXl6eJCkvL08VFRWSpG3btmnixIlyuVzKy8tTZWWlgsGg6uvrVVdXp/Hjx/fiagAAAH1Tty9BZmVlqaCgQMXFxUpISFBmZqa+//3v67777tNjjz2mVatWKTMzU6WlpZKkkpISLVq0SH6/X8OGDdOvf/1rSdLYsWM1depU3X///XK73XrmmWfkdrt7dzsAAIA+yOU4jhPrIb7MxYvhqK8Bu+XJyl6Y6PLqflGokyfbzc4Xrf78Or/EfvGsP+8msV+8Y7/4FdfXgAEAAKBnEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGrijA2traNH/+fE2ZMkVTp07V3/72N7W0tGj27NnKz8/X7Nmz1draKklyHEfLli2T3+/X9OnTdeTIkcj9VFRUKD8/X/n5+aqoqOidjQAAAPq4Kwqw5cuX69vf/rbeeecdbd26VWPGjNGaNWs0adIkbd++XZMmTdKaNWskSdXV1aqrq9P27dv1/PPPa+nSpZKklpYWlZeXa/369dqwYYPKy8sj0QYAAHA96TbA2tvb9de//lUlJSWSpMTERKWkpKiqqkpFRUWSpKKiIu3cuVOSIre7XC7deeedamtrU1NTk2pqapSTkyOPx6Nhw4YpJydHe/fu7cXVAAAA+qaE7j6hoaFBqampeuqpp3Ts2DGNGzdOTz/9tJqbm5Weni5JSktLU3NzsyQpEAjI5/NFjvf5fAoEAl+43ev1KhAI9PQ+AAAAfV63ARYKhfT3v/9dS5YsUVZWlpYtWxZ5ufFTLpdLLperx4dzu13yeIb0+P32hniY0+0eEBdzRov94ld/3k1iv3jHfvGrL+/WbYD5fD75fD5lZWVJkqZMmaI1a9ZoxIgRampqUnp6upqampSamirp0jNbjY2NkeMbGxvl9Xrl9Xr17rvvRm4PBAKaMGHCV547HHbU0nLuqpdKS0u+6mOuVTRzWvN4hsTFnNFiv/jVn3eT2C/esV/8st7tavqj22vA0tLS5PP59K9//UuStH//fo0ZM0Z5eXnasmWLJGnLli2aPHmyJEVudxxHH3zwgZKTk5Wenq7c3FzV1NSotbVVra2tqqmpUW5ubjT7AQAAxLVunwGTpCVLlujxxx/XxYsXlZGRoZ///Ofq6upSWVmZNm7cqFGjRmnVqlWSpHvvvVd79uyR3+9XUlKSVqxYIUnyeDx6+OGHIxfzP/LII/J4PL20FgAAQN/lchzHifUQX+bixXDUL0He8mRlL0x0eXW/KNTJk+1m54tWf36aWWK/eNafd5PYL96xX/yK65cgAQAA0LMIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMJYQ6wFw9UYMS9CAxKSojk1LS77qY7qCnWpuDUV1PgAA8EUEWBwakJgkLR1md76lrZLazc4HAEB/x0uQAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgLErDrBwOKyioiL95Cc/kSTV19ertLRUfr9fZWVlCgaDkqRgMKiysjL5/X6VlpaqoaEhch+rV6+W3+9XQUGB9u7d28OrAAAAxIcrDrB169ZpzJgxkT+vXLlSs2bN0o4dO5SSkqKNGzdKkjZs2KCUlBTt2LFDs2bN0sqVKyVJx48fV2VlpSorK/Xyyy/rueeeUzgc7uF1AAAA+r4rCrDGxkbt3r1bJSUlkiTHcVRbW6uCggJJUnFxsaqqqiRJu3btUnFxsSSpoKBA+/fvl+M4qqqqUmFhoRITE5WRkaHRo0fr8OHDvbETAABAn5ZwJZ+0YsUKLVq0SB0dHZKkM2fOKCUlRQkJlw73+XwKBAKSpEAgoJEjR16684QEJScn68yZMwoEAsrKyorcp9frjRzzZdxulzyeIVe/VQzEy5zRiof93O4BcTFntPrzfv15N4n94h37xa++vFu3AfbnP/9Zqamp+uY3v6kDBw5YzBQRDjtqaTl31celpSX3wjRfLZo5o9Xf94uWxzMkLuaMVn/erz/vJrFfvGO/+GW929V8f+42wA4ePKhdu3apurpaFy5c0NmzZ7V8+XK1tbUpFAopISFBjY2N8nq9ki49s3XixAn5fD6FQiG1t7dr+PDh8nq9amxsjNxvIBCIHAMAAHA96fYasIULF6q6ulq7du3Sr371K02cOFG//OUvlZ2drW3btkmSKioqlJeXJ0nKy8tTRUWFJGnbtm2aOHGiXC6X8vLyVFlZqWAwqPr6etXV1Wn8+PG9uBoAAEDfFPX7gC1atEivvvqq/H6/WlpaVFpaKkkqKSlRS0uL/H6/Xn31VT3++OOSpLFjx2rq1Km6//779dBDD+mZZ56R2+3umS0AAADiyBVdhP+p7OxsZWdnS5IyMjIibz3xWYMGDdJvfvObyx4/b948zZs3L4oxAQAA+g/eCR8AAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCs2wA7ceKEfvjDH+r+++9XYWGh/vjHP0qSWlpaNHv2bOXn52v27NlqbW2VJDmOo2XLlsnv92v69Ok6cuRI5L4qKiqUn5+v/Px8VVRU9NJKAAAAfVu3AeZ2u/Xkk0/q7bff1htvvKE//elPOn78uNasWaNJkyZp+/btmjRpktasWSNJqq6uVl1dnbZv367nn39eS5culXQp2MrLy7V+/Xpt2LBB5eXlkWgDAAC4nnQbYOnp6Ro3bpwkaejQobrtttsUCARUVVWloqIiSVJRUZF27twpSZHbXS6X7rzzTrW1tampqUk1NTXKycmRx+PRsGHDlJOTo7179/biagAAAH3TVV0D1tDQoKNHjyorK0vNzc1KT0+XJKWlpam5uVmSFAgE5PP5Isf4fD4FAoEv3O71ehUIBHpiBwAAgLiScKWf2NHRofnz52vx4sUaOnTo5z7mcrnkcrl6fDi32yWPZ0iP329viJc5oxUP+7ndA+Jizmj15/36824S+8U79otffXm3Kwqwixcvav78+Zo+fbry8/MlSSNGjFBTU5PS09PV1NSk1NRUSZee2WpsbIwc29jYKK/XK6/Xq3fffTdyeyAQ0IQJE77yvOGwo5aWc1e9VFpa8lUfc62imTNa/X2/aHk8Q+Jizmj15/36824S+8U79otf1rtdzffnbl+CdBxHTz/9tG677TbNnj07cnteXp62bNkiSdqyZYsmT578udsdx9EHH3yg5ORkpaenKzc3VzU1NWptbVVra6tqamqUm5t7tbsBAADEvW6fAXv//fe1detW3X777ZoxY4YkacGCBZo7d67Kysq0ceNGjRo1SqtWrZIk3XvvvdqzZ4/8fr+SkpK0YsUKSZLH49HDDz+skpISSdIjjzwij8fTW3sBAAD0Wd0G2N13362PPvrosh/79D3BPsvlcunZZ5+97OeXlJREAgwAAOB6xTvhAwAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGCPAAAAAjBFgAAAAxggwAAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAwRoABAAAYI8AAAACMEWAAAADGCDAAAABjBBgAAIAxAgwAAMAYAQYAAGCMAAMAADBGgAEAABgjwAAAAIwRYAAAAMYIMAAAAGMEGAAAgDECDAAAwBgBBgAAYIwAAwAAMEaAAQAAGEuI9QDAfxvqGaikgYOjOjYtLfmqj+m8eF5nWy5Gdb5oDB86UAlJdvuFOs/rzFm7/QAA3TMPsOrqai1fvlxdXV0qLS3V3LlzrUdAH5c0cLDu+OMdZuf78H8+1FnZBUpC0mAd/Uam2fkyjx2VCDAA6FNMX4IMh8P62c9+ppdfflmVlZV66623dPz4ccsRAAAAYs40wA4fPqzRo0crIyNDiYmJKiwsVFVVleUIAAAAMWf6EmQgEJDP54v82ev16vDhw5YjAOhFw1KSlDgoui8r0VzfFrwQUmtbZ1Tni8aw5EQlDh4U1bFR7Xf+glrbg1GdLxqpKUlyGz5+4QshnTZ8/IC+pE9fhD9woDuqv9SSVPeLwh6e5qtFO2fUlraans56vw//50PT81nvl3nsqOn5zP//NJI4KKHf7iZJiYMHKS3K4IsH7jh6/OJlzmj15/366m6mL0F6vV41NjZG/hwIBOT1ei1HAAAAiDnTALvjjjtUV1en+vp6BYNBVVZWKi8vz3IEAACAmDN9CTIhIUHPPPOMHnroIYXDYc2cOVNjx461HAEAACDmXI7jOLEeAgAA4HrCryICAAAwRoABAAAYI8AAAACMEWAAAADG3EuXLl0a6yFi5eOPP9ZHH32k1NRUJSYmRm6vrq7W6NGjYzhZzzh8+HDkvdaOHz+urVu3qq2tTbfcckusR+txTzzxhPx+f6zH6BGHDh3S0KFDlZiYqPPnz+t3v/udXn31VR09elTjx4/XoEHx/cac69atU3p6upKT++abI/a09957T9u2bVNHR4duvvnmWI9zzYLBoN58802dPn1aGRkZevPNN/XGG2+ooaFBmZmZcrvdsR4RX6G+vl4bN27U22+/rb/85S9qaGjQrbfe+rnvgbBx3f4U5Lp16/T6669rzJgxOnbsmBYvXqzvfve7kqTi4mJVVFTEeMJrU15erurqaoVCIeXk5OjQoUPKzs7Wvn37lJubq3nz5sV6xKj99Kc//cJtBw4cUHZ2tiTppZdesh6pRxUWFmrr1q1KSEjQkiVLNHjwYBUUFKi2tlbHjh1TeXl5rEe8JnfddZeSkpJ08803q7CwUFOnTlVqamqsx+oxJSUl2rhxoyRp/fr1ev311+X3+1VTU6O8vDzNnTs3xhNem4ULFyocDuv8+fNKTk7WuXPn5Pf7VVtbK8dx9MILL8R6RHyJdevWaffu3br77rtVXV2tzMxMpaSkaMeOHXr22WcjX0NhxLlOTZs2zTl79qzjOI5TX1/vFBcXO2vXrnUcx3FmzJgRy9F6xLRp05xQKOScO3fO+da3vuW0t7c7juM4nZ2dzrRp02I83bUpKipyFi5c6NTW1joHDhxwamtrnZycHOfAgQPOgQMHYj3eNZsyZUrkv4uKij73se9973vW4/S4GTNmOOFw2Nm7d6/z1FNPOdnZ2c6cOXOczZs3R/4/jWef/frxwAMPOM3NzY7jOE5HR0fc/91zHCeyw8WLF51JkyY5oVDIcRzH6erq6hf7fZUf/ehHsR7hmnz6fcFxHOfcuXPOD37wA8dxHOff//53v/i+5ziO09bW5rz44otOQUGBc8899zgTJkxwpkyZ4rz44otOa2trrMf7nD79uyB7U1dXl2644QZJ0k033aTXXntN8+fP13/+8x85/eBJQbfbLbfbHXmmYejQoZKkwYMHa8CA+L70b9OmTVq3bp1eeuklPfHEE8rMzNSgQYM0YcKEWI/WI8aOHatNmzZp5syZ+sY3vqEPP/xQd9xxhz755BMlJMT/X1mXy6UBAwYoNzdXubm5unjxoqqrq1VZWakXXnhBtbW1sR7xmnR1dam1tVVdXV1yHCfy7N6QIUP6xctzjuMoGAyqs7NTnZ2dam9vl8fjUTAYVCgUivV41+zIkSOXvd1xHB07dsx4mp4XDofldrsVDAbV0dEhSRo1alS/eOwkqaysTNnZ2XrttdeUlpYmSTp58qQqKipUVlamP/zhDzGe8P/F/1fzKI0YMUJHjx5VZmamJOmGG27Q6tWrtXjxYv3jH/+I8XTXbuDAgers7FRSUpI2b94cub29vT3uA2zAgAGaNWuWpkyZohUrVujGG29UOByO9Vg9Zvny5Vq+fLl+//vfa/jw4XrwwQfl8/k0cuRILV++PNbjXbP//gfOwIEDNXnyZE2ePFmdnZ0xmqrnnD17Vg888IAcx5HL5VJTU5PS09PV0dHRL/5xV1JSoqlTp6qrq0uPPfaYHn30UWVkZOjQoUMqLCyM9XjXrKSkRPfcc89lH6u2trYYTNRzSkpKNHPmTGVlZem9997Tj3/8Y0nS6dOnNWzYsBhP1zMaGhr0yiuvfO62tLQ0zZ07V5s2bYrRVJd33V4D1tjYKLfbHSnkz3r//fd11113xWCqnhMMBi97UeXp06d18uRJff3rX4/BVL1j9+7dOnjwoBYsWBDrUXrU2bNn1dDQoFAoJJ/PpxtvvDHWI/WITz75RLfeemusxzDX2dmpU6dOKSMjI9ajXLNAICBJ8nq9amtr0759+zRq1CiNHz8+xpNdu2nTpqm8vPyyP6x07733as+ePfZD9aB//vOf+vjjjzV27FiNGTMm1uP0uDlz5mjSpEkqLi6OfM08deqUNm/erH379mnt2rWxHfAzrtsAAwDgv73zzju6/fbbddttt33hYzt37oz8sBb6ptbWVq1Zs0ZVVVU6ffq0pEuveH36AzB96Zk+AgwAgCvw6bWZiE997fGL74uBAAAw8tvf/jbWI+Aa9LXH77q9CB8AgP82ffr0L/3YqVOnDCdBNOLp8SPAAAD4P83NzXrllVeUkpLyudsdx9GDDz4Yo6lwpeLp8SPAAAD4P/fdd586Ojoib1H0WbxTfN8XT48fF+EDAAAY4yJ8AAAAYwQYAACAMQIMAADAGAEGAABgjAADAAAw9r+4zFev7koh3AAAAABJRU5ErkJggg==\n",
520 | "text/plain": [
521 | ""
522 | ]
523 | },
524 | "metadata": {},
525 | "output_type": "display_data"
526 | }
527 | ],
528 | "source": [
529 | "plt.figure(figsize=(10,5))\n",
530 | "df['TLD'].value_counts().plot(kind='bar')"
531 | ]
532 | },
533 | {
534 | "cell_type": "code",
535 | "execution_count": 162,
536 | "metadata": {},
537 | "outputs": [
538 | {
539 | "data": {
540 | "text/plain": [
541 | ""
542 | ]
543 | },
544 | "execution_count": 162,
545 | "metadata": {},
546 | "output_type": "execute_result"
547 | },
548 | {
549 | "data": {
550 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAloAAAE1CAYAAAA/EU74AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGqdJREFUeJzt3X9QVOfd9/HPcgADBVl+E70hiiVTR42ZqTZDY7CDJVTBSFViMuNMRVPG6GiRzhhtp8bQGrXp7WjMM0bGNNWM06lVAlVsavBRsbXG2LRR09ifOgOduFQU/IVBlr3/yN19HstWlLOXZ3d5v/4Je+3Z63yv7+y4n5xz9qzL5/P5BAAAgKCLcroAAACASEXQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABgS7XQBktTb2yuvNzRuUG9ZrpCpJZTQl8DoS1/0JDD6Ehh9CYy+9BVKPYmJse5625AIWl6vTx0dN5wuQ5LkdseHTC2hhL4ERl/6oieB0ZfA6Etg9KWvUOpJenriXW/LqUMAAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADAmJ3zoMloShcYobYn9J9/IbRv9J16c9unaly/Y8AAAgfEVU0IobEq0RKxqdLkOSdH5dia45XQQAAHAUpw4BAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGNLvT/CsXLlShw8fVmpqqvbt2ydJqqqq0rlz5yRJV69eVWJiohoaGtTa2qpp06Zp5MiRkqTx48erpqbGYPkAAAChq9+gNXPmTM2dO1cvvPCCf2zjxo3+v9etW6eEhAT/45ycHDU0NAS5TAAAgPDT76nDiRMnKikpKeBzPp9Pv/zlL1VaWhr0wgAAAMKdrWu0Tp48qdTUVI0YMcI/1traqrKyMs2dO1cnT560Wx8AAEDY6vfU4Z3s27fvtqNZGRkZOnTokJKTk3XmzBktXrxYjY2Nt51aDMSyXHK74+2UEpIiaU2WFRVR6wkW+tIXPQmMvgRGXwKjL32Fa08GHLR6enr07rvvqq6uzj8WGxur2NhYSdLYsWOVk5Ojc+fOady4cXecy+v1qaPjxkBL8UtPT7Q9RzAFY02hwu2Oj6j1BAt96YueBEZfAqMvgdGXvkKpJ/eSNwZ86vDYsWPKzc1VVlaWf+zSpUvyer2SpJaWFp0/f17Z2dkD3QUAAEBY6/eIVnV1tU6cOKHLly+roKBAS5YsUXl5ufbv36+SkpLbtn3//ff16quvKjo6WlFRUXrppZfkdruNFQ8AABDK+g1aGzZsCDi+bt26PmPFxcUqLi62XxUAAEAE4M7wAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABjS728dIvylJkUrKjbO9jzp6Ym2Xt/b3aX2zh7bdQAAEC4IWoNAVGyctDrJ6TIUtbpT0lWnywAA4L7h1CEAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGNJv0Fq5cqXy8/NVWlrqH9u8ebOeeOIJzZgxQzNmzNCRI0f8z23dulVFRUUqLi7W0aNHzVQNAAAQBqL722DmzJmaO3euXnjhhdvG582bpwULFtw29te//lWNjY1qbGyUx+NRRUWFfvWrX8myrOBWDQAAEAb6PaI1ceJEJSUl3dVkBw8eVElJiWJjY5Wdna2HHnpIp06dsl0kAABAOOr3iNZ/snPnTtXX12vs2LFasWKFkpKS5PF4NH78eP82mZmZ8ng8/c5lWS653fEDLSVkReKa7Iq0nlhWVMStyS56Ehh9CYy+BEZf+grXngwoaD377LNatGiRXC6XNm3apHXr1mnt2rUDLsLr9amj48aAX/8v6emJtucIpmCsKRhCqS+h0pNgcbvjI25NdtGTwOhLYPQlMPrSVyj15F4+Vwf0rcO0tDRZlqWoqCiVl5fr9OnTkj47gnXhwgX/dh6PR5mZmQPZBQAAQNgbUNBqa2vz/93U1KS8vDxJUmFhoRobG9Xd3a2WlhadP39ejzzySHAqBQAACDP9njqsrq7WiRMndPnyZRUUFGjJkiU6ceKEzp49K0kaPny4ampqJEl5eXmaOnWqpk2bJsuytGrVKr5xCAAABq1+g9aGDRv6jJWXl//H7Z9//nk9//zz9qoCAACIANwZHgAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCHR/W2wcuVKHT58WKmpqdq3b58kaf369Tp06JBiYmKUk5OjtWvXaujQoWptbdW0adM0cuRISdL48eNVU1NjdgUAAAAhqt8jWjNnztS2bdtuG3v88ce1b98+7d27VyNGjNDWrVv9z+Xk5KihoUENDQ2ELAAAMKj1G7QmTpyopKSk28YmTZqk6OjPDoY9+uijunDhgpnqAAAAwli/pw77s2fPHk2dOtX/uLW1VWVlZUpISFBVVZUmTJjQ7xyW5ZLbHW+3lJATiWuyK9J6YllREbcmu+hJYPQlMPoSGH3pK1x7YitobdmyRZZl6amnnpIkZWRk6NChQ0pOTtaZM2e0ePFiNTY2KiEh4Y7zeL0+dXTcsFOKJCk9PdH2HMEUjDUFQyj1JVR6Eixud3zErckuehIYfQmMvgRGX/oKpZ7cy+fqgL91WFdXp8OHD+tHP/qRXC6XJCk2NlbJycmSpLFjxyonJ0fnzp0b6C4AAADC2oCCVnNzs7Zt26YtW7YoLi7OP37p0iV5vV5JUktLi86fP6/s7OzgVAoAABBm+j11WF1drRMnTujy5csqKCjQkiVLVFtbq+7ublVUVEj6f7dxeP/99/Xqq68qOjpaUVFReumll+R2u40vAgAAIBT1G7Q2bNjQZ6y8vDzgtsXFxSouLrZfFQAAQATgzvAAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACG3FXQWrlypfLz81VaWuof6+joUEVFhZ588klVVFSos7NTkuTz+fSDH/xARUVFmj59uj766CMzlQMAAIS4uwpaM2fO1LZt224bq62tVX5+vg4cOKD8/HzV1tZKkpqbm3X+/HkdOHBA3//+97V69eqgFw0AABAO7ipoTZw4UUlJSbeNHTx4UGVlZZKksrIyNTU13Tbucrn06KOP6sqVK2prawty2QAAAKFvwNdotbe3KyMjQ5KUnp6u9vZ2SZLH41FWVpZ/u6ysLHk8HptlAgAAhJ/oYEzicrnkcrkG/HrLcsntjg9GKSElEtdkV6T1xLKiIm5NdtGTwOhLYPQlMPrSV7j2ZMBBKzU1VW1tbcrIyFBbW5tSUlIkSZmZmbpw4YJ/uwsXLigzM/OOc3m9PnV03BhoKX7p6Ym25wimYKwpGEKpL6HSk2Bxu+Mjbk120ZPA6Etg9CUw+tJXKPXkXj5XB3zqsLCwUPX19ZKk+vp6TZky5bZxn8+nP/zhD0pMTPSfYgQAABhM7uqIVnV1tU6cOKHLly+roKBAS5YsUWVlpaqqqrR7924NGzZMGzdulCRNnjxZR44cUVFRkeLi4vTyyy8bXQAAAECouqugtWHDhoDj27dv7zPmcrn04osv2qsKAAAgAnBneAAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIZED/SFf//737Vs2TL/45aWFi1dulRXr17Vrl27lJKSIkmqrq7W5MmT7VcKAAAQZgYctHJzc9XQ0CBJ8nq9KigoUFFRkerq6jRv3jwtWLAgaEUCAACEo6CcOvztb3+r7OxsDR8+PBjTAQAARISgBK3GxkaVlpb6H+/cuVPTp0/XypUr1dnZGYxdAAAAhB2Xz+fz2Zmgu7tbTzzxhBobG5WWlqaLFy8qOTlZLpdLmzZtUltbm9auXXvHOXp7e+X12ipDkhQTY2nEikbb8wTD+XUlunXL63QZkj7ri1YnOV2GtLozZHoSLJYVJa+31+kyQgo9CYy+BEZfAqMvfYVST2JirLvedsDXaP1Lc3OzxowZo7S0NEny/1eSysvLtXDhwn7n8Hp96ui4YbcUpacn2p4jmIKxpmAIpb6ESk+Cxe2Oj7g12UVPAqMvgdGXwOhLX6HUk3v5XLV96rCxsVElJSX+x21tbf6/m5qalJeXZ3cXAAAAYcnWEa0bN27o2LFjqqmp8Y+98sorOnv2rCRp+PDhtz0HAAAwmNgKWvHx8XrvvfduG3vllVdsFQQAABApuDM8AACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQ6LtTlBYWKjPfe5zioqKkmVZqqurU0dHh5YtW6Z//OMfGj58uDZu3KikpKRg1AsAABA2gnJEa/v27WpoaFBdXZ0kqba2Vvn5+Tpw4IDy8/NVW1sbjN0AAACEFSOnDg8ePKiysjJJUllZmZqamkzsBgAAIKTZPnUoSQsWLJDL5dKcOXM0Z84ctbe3KyMjQ5KUnp6u9vb2O77eslxyu+ODUUpIicQ12RVpPbGsqIhbk130JDD6Ehh9CYy+9BWuPbEdtH76058qMzNT7e3tqqioUG5u7m3Pu1wuuVyuO87h9frU0XHDbilKT0+0PUcwBWNNwRBKfQmVngSL2x0fcWuyi54ERl8Coy+B0Ze+Qqkn9/K5avvUYWZmpiQpNTVVRUVFOnXqlFJTU9XW1iZJamtrU0pKit3dAAAAhB1bQevGjRu6du2a/+/f/OY3ysvLU2Fhoerr6yVJ9fX1mjJliv1KAQAAwoytU4ft7e1avHixJMnr9aq0tFQFBQUaN26cqqqqtHv3bg0bNkwbN24MSrEAAADhxFbQys7O1i9+8Ys+48nJydq+fbudqQEAAMIed4YHAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBBbv3UIhKsEd4ziYh4Iylzp6Ym2Xt9166auddwKSi12JSfEKDrOfl/s9kSSerpu6vK10OgLAAwUQQuDUlzMAxq3fZzTZUiSTn/jtK4pNAJFdNwD+vgLo50uQ5I0+uzHEkELQJjj1CEAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGBI90Bd+8sknWr58udrb2+VyufT000/rG9/4hjZv3qxdu3YpJSVFklRdXa3JkycHrWAAAIBwMeCgZVmWVqxYoTFjxujatWuaNWuWHn/8cUnSvHnztGDBgqAVCQAAEI4GHLQyMjKUkZEhSUpISFBubq48Hk/QCgMAAAh3Aw5a/7/W1lZ9/PHHGj9+vD744APt3LlT9fX1Gjt2rFasWKGkpKQ7vt6yXHK744NRSkiJxDXZRU8Coy+BRVJfLCsqotYTLPQlMPrSV7j2xHbQun79upYuXarvfOc7SkhI0LPPPqtFixbJ5XJp06ZNWrdundauXXvHObxenzo6btgtRenpibbnCKZgrCkYQqkv9CQw+hJYqPQlGNzu+IhaT7DQl8DoS1+h1JN7+bfS1rcOb926paVLl2r69Ol68sknJUlpaWmyLEtRUVEqLy/X6dOn7ewCAAAgbA04aPl8Pn33u99Vbm6uKioq/ONtbW3+v5uampSXl2evQgAAgDA14FOHv/vd79TQ0KCHH35YM2bMkPTZrRz27duns2fPSpKGDx+umpqa4FQKAAAQZgYctCZMmKA//elPfca5ZxYAAMBnuDM8AACAIQQtAAAAQwhaAAAAhgTlhqUAEMmShsYpdoj9fy6DcZ+y7k971Hmly/Y8AO4PghYA9CN2SLT+z8L/63QZkqTFrxc6XQKAe8CpQwAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhkQ7XQAAIDwlJcYq9oEhtudJT0+09frum5+q82q37TqCIWVonKwhwflotdsX76c9unSlKyi1YOAIWgCAAYl9YIj+e06p02Xo2z/bJ4VI0LKGRKt1xVGny5Ak/de6J5wuATIYtJqbm7VmzRr19vaqvLxclZWVpnYFAABCWFLSEMXGxtqex+5RPknq7u5WZ+entue5W0aCltfrVU1Njd58801lZmZq9uzZKiws1Oc//3kTuwMAACEsNjZWq1evdroMSfrfOu5f0DJyMfypU6f00EMPKTs7W7GxsSopKdHBgwdN7AoAACBkGQlaHo9HWVlZ/seZmZnyeDwmdgUAABCyXD6fzxfsSd955x0dPXpUa9askSTV19fr1KlTWrVqVbB3BQAAELKMHNHKzMzUhQsX/I89Ho8yMzNN7AoAACBkGQla48aN0/nz59XS0qLu7m41NjaqsLDQxK4AAABClpFvHUZHR2vVqlV67rnn5PV6NWvWLOXl5ZnYFQAAQMgyco0WAAAA+K1DAAAAYwhaAAAAhhC0AAAADCFoAQAAGGLsR6XD1fLly/XDH/7Q6TIc9eGHH2rUqFFKSEjQzZs3VVtbqz/+8Y8aNWqUFi5cqMRE+z/qGY527NihoqIiPfjgg06XErJOnjyp06dPKy8vT5MmTXK6HMd0d3dr//79ysjI0Je//GXt3btXv//97zVq1Cg9/fTTiomJcbpEhJCWlhYdOHBAn3zyiSzL0ogRIzR9+nQlJCQ4XRqCYFB/63DhwoV9xt577z099thjkqTXX3/9fpcUEkpKStTQ0KDo6Gh973vf0wMPPKDi4mIdP35cZ8+e1WuvveZ0iY744he/qLi4OOXk5KikpERTp05VSkqK02U5avbs2dq9e7ckadeuXdq5c6eKior061//WoWFhaqsrHS4Qmd8+9vfltfr1c2bN5WYmKgbN26oqKhIx48fl8/n0/r1650uESFix44dOnz4sCZMmKDm5maNHj1aQ4cO1bvvvqsXX3zR/3mE8DWoj2h5PB6NGjVK5eXlcrlc8vl8OnPmjObPn+90aY7q7e1VdPRnb40zZ87o7bffliRNmDBBM2bMcLI0R2VnZ6uurk7Hjh3T/v37tXnzZo0ZM0alpaUqKioalP/32dPT4//7Zz/7md58802lpKRo/vz5mjNnzqANWn/+85+1d+9e9fT0qKCgQEePHpVlWZoxY4aeeuopp8sLSc8995y2bdvmdBn33c9//nPV19fLsixVVFSosrJSb731lubMmaNFixapvr7e6RIdc/XqVW3dulVNTU26dOmSXC6XUlJSNGXKFFVWVmro0KFOl3hXBnXQ2rNnj3bs2KHXX39dy5cv1+jRozVkyBB96Utfcro0R+Xl5WnPnj2aNWuWvvCFL+j06dMaN26czp075w9gg5HL5VJUVJQmTZqkSZMm6datW2publZjY6PWr1+v48ePO13ifdfb26vOzk719vbK5/P5j/DFx8fLsiyHq3OOz+dTd3e3urq61NXVpatXr8rtdqu7u/u2cDrYfPTRRwHHfT6fzp49e5+rCR1er1eWZam7u1vXr1+XJA0bNmxQv1ckqaqqSo899pjeeustpaenS5L++c9/6u2331ZVVZV+/OMfO1zh3Rm8n5qSoqKiNG/ePH3ta1/Tyy+/rLS0NHm9XqfLctyaNWu0Zs0abdmyRcnJyXrmmWeUlZWlBx980P9D4YPRv59lj4mJ0ZQpUzRlyhR1dXU5VJWzrl27ppkzZ8rn88nlcqmtrU0ZGRm6fv16n34NJrNnz9bUqVPV29urZcuW6Vvf+pays7P14YcfqqSkxOnyHDN79mxNnDgx4HvjypUrDlTkvNmzZ2vWrFkaP368Tp48qW9+85uSpEuXLikpKcnh6pzV2tqqN95447ax9PR0VVZWas+ePQ5Vde8G9TVa/+7w4cP64IMPVF1d7XQpIeHatWtqbW1VT0+PsrKylJaW5nRJjjp37pxGjhzpdBlhoaurSxcvXlR2drbTpTjG4/FIkjIzM3XlyhUdO3ZMw4YN0yOPPOJwZc4pLS3Va6+9phEjRvR5bvLkyTpy5Mj9LyoE/OUvf9Hf/vY35eXladSoUU6XEzLmz5+v/Px8ff3rX/d//ly8eNF/CcdPfvITZwu8SwQtAMB98c477+jhhx9Wbm5un+eampr01a9+1YGqEKo6OztVW1urgwcP6tKlS5Kk1NRU/xdtwuWIH0ELAOC4f10XCtyNcHq/cMNSAIDjNm/e7HQJCCPh9H4Z1BfDAwDun+nTp//H5y5evHgfK0E4iJT3C0ELAHBftLe364033uhz/yOfz6dnnnnGoaoQqiLl/ULQAgDcF1/5yld0/fp1jR49us9z3AEd/y5S3i9cDA8AAGAIF8MDAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIf8DkE+pTkvNc6oAAAAASUVORK5CYII=\n",
551 | "text/plain": [
552 | ""
553 | ]
554 | },
555 | "metadata": {},
556 | "output_type": "display_data"
557 | }
558 | ],
559 | "source": [
560 | "plt.figure(figsize=(10,5))\n",
561 | "df.loc[df['TLD'] > 3]['TLD'].value_counts().plot(kind='bar')"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 163,
567 | "metadata": {},
568 | "outputs": [
569 | {
570 | "data": {
571 | "text/html": [
572 | "\n",
573 | "\n",
586 | "
\n",
587 | " \n",
588 | " \n",
589 | " | \n",
590 | " NumberOfParts | \n",
591 | " Length | \n",
592 | " LongestPart | \n",
593 | " TLD | \n",
594 | " Randomness | \n",
595 | "
\n",
596 | " \n",
597 | " \n",
598 | " \n",
599 | " | emil.engineering | \n",
600 | " 2 | \n",
601 | " 16 | \n",
602 | " 11 | \n",
603 | " 11 | \n",
604 | " 0.0 | \n",
605 | "
\n",
606 | " \n",
607 | "
\n",
608 | "
"
609 | ],
610 | "text/plain": [
611 | " NumberOfParts Length LongestPart TLD Randomness\n",
612 | "emil.engineering 2 16 11 11 0.0"
613 | ]
614 | },
615 | "execution_count": 163,
616 | "metadata": {},
617 | "output_type": "execute_result"
618 | }
619 | ],
620 | "source": [
621 | "df.loc[df['TLD'] == 11].sample()"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": null,
627 | "metadata": {},
628 | "outputs": [],
629 | "source": []
630 | }
631 | ],
632 | "metadata": {
633 | "kernelspec": {
634 | "display_name": "Python 3",
635 | "language": "python",
636 | "name": "python3"
637 | },
638 | "language_info": {
639 | "codemirror_mode": {
640 | "name": "ipython",
641 | "version": 3
642 | },
643 | "file_extension": ".py",
644 | "mimetype": "text/x-python",
645 | "name": "python",
646 | "nbconvert_exporter": "python",
647 | "pygments_lexer": "ipython3",
648 | "version": "3.6.7"
649 | }
650 | },
651 | "nbformat": 4,
652 | "nbformat_minor": 2
653 | }
654 |
--------------------------------------------------------------------------------