├── snape ├── __init__.py ├── scratch.py ├── test │ ├── __init__.py │ ├── test_utils.py │ ├── test_score_dataset.py │ ├── test_make_dataset.py │ └── test_make_image_dataset.py ├── utils.py ├── score_dataset.py ├── make_image_dataset.py ├── make_dataset.py └── flicker.py ├── doc ├── make_dataset.md ├── score_dataset.md ├── config_classification.json.md └── config_regression.json.md ├── .coveragerc ├── .gitignore ├── example ├── config_image_classification.json ├── config_regression.json ├── config_regression_starschema.json ├── config_classification.json ├── config_classification_starschema.json ├── config_multiclass_classification.json ├── config_multiclass_classification_starschema.json ├── module_usage_example.py └── student_predictions.csv ├── setup.cfg ├── .travis.yml ├── setup.py ├── Makefile ├── README.md └── LICENSE /snape/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snape/scratch.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/make_dataset.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/score_dataset.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snape/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | show_missing = True 3 | 4 | [run] 5 | source = snape 6 | omit = 7 | */setup.py 8 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_* 2 | *.pyc 3 | __pycache__ 4 | .idea 5 | *.egg-info 6 | build/ 7 | dist/ 8 | .pytest_cache 9 | .coverage 10 | .coverage.* 11 | test_images/ 12 | -------------------------------------------------------------------------------- /example/config_image_classification.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_classes": 2, 3 | "n_samples": 10, 4 | "out_path": "./", 5 | "weights": [0.8,0.2], 6 | "image_source": "imagenet", 7 | "random_seed":42 8 | } -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test = pytest 3 | 4 | [tool:pytest] 5 | filterwarnings = 6 | # Warnings that we raise: 7 | ignore::UserWarning 8 | 9 | [metadata] 10 | description-file = README.md 11 | 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | python: 4 | - "3.6" 5 | - "3.7" 6 | 7 | addons: 8 | apt: 9 | packages: 10 | - libblas-dev 11 | - liblapack-dev 12 | 13 | install: 14 | - | 15 | travis_wait travis_retry pip install scipy numpy 16 | make test-dependencies 17 | # eventually get rid of this when on codecov: 18 | pip install coveralls 19 | pip install -e . 20 | 21 | script: 22 | - make test 23 | 24 | after_success: 25 | - | 26 | # TODO: switch to codecov 27 | coveralls || echo "Coveralls upload failed" 28 | -------------------------------------------------------------------------------- /example/config_regression.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"regression", 3 | "n_samples":10000, 4 | "n_features":100, 5 | "out_path":"./", 6 | "output":"my_dataset", 7 | "n_informative":30, 8 | "n_targets":1, 9 | "effective_rank":1, 10 | "tail_strength":0.5, 11 | "noise":0.0, 12 | "pct_missing":0.01, 13 | "insert_dollar":"Yes", 14 | "insert_percent":"Yes", 15 | "star_schema":"No", 16 | "label_list":[["america","asia", "europe"], ["monday", "tuesday", "wednesday", "thursday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 17 | "random_seed":42, 18 | "shuffle": true 19 | } 20 | -------------------------------------------------------------------------------- /example/config_regression_starschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"regression", 3 | "n_samples":10000, 4 | "n_features":100, 5 | "out_path":"./", 6 | "output":"my_dataset", 7 | "n_informative":30, 8 | "n_targets":1, 9 | "effective_rank":1, 10 | "tail_strength":0.5, 11 | "noise":0.0, 12 | "pct_missing":0.01, 13 | "insert_dollar":"Yes", 14 | "insert_percent":"Yes", 15 | "star_schema":"Yes", 16 | "label_list":[["america","asia", "europe"], ["monday", "tuesday", "wednesday", "thursday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 17 | "random_seed":42, 18 | "shuffle": true 19 | } 20 | -------------------------------------------------------------------------------- /example/config_classification.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"classification", 3 | "n_classes":2, 4 | "n_samples":10000, 5 | "n_features":100, 6 | "out_path":"./", 7 | "output":"my_dataset", 8 | "n_informative":30, 9 | "n_duplicate":3, 10 | "n_redundant":5, 11 | "n_clusters":4, 12 | "weights":[0.8,0.2], 13 | "pct_missing":0.01, 14 | "insert_dollar":"Yes", 15 | "insert_percent":"Yes", 16 | "star_schema":"No", 17 | "label_list":[["america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 18 | "random_seed":42, 19 | "shuffle": true 20 | } 21 | -------------------------------------------------------------------------------- /example/config_classification_starschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"classification", 3 | "n_classes":2, 4 | "n_samples":10000, 5 | "n_features":100, 6 | "out_path":"./", 7 | "output":"my_dataset", 8 | "n_informative":30, 9 | "n_duplicate":3, 10 | "n_redundant":5, 11 | "n_clusters":4, 12 | "weights":[0.8,0.2], 13 | "pct_missing":0.01, 14 | "insert_dollar":"Yes", 15 | "insert_percent":"Yes", 16 | "star_schema":"Yes", 17 | "label_list":[["america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 18 | "random_seed":42, 19 | "shuffle": true 20 | } 21 | -------------------------------------------------------------------------------- /example/config_multiclass_classification.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"classification", 3 | "n_classes":5, 4 | "n_samples":10000, 5 | "n_features":100, 6 | "out_path":"./", 7 | "output":"my_dataset", 8 | "n_informative":30, 9 | "n_duplicate":3, 10 | "n_redundant":5, 11 | "n_clusters":4, 12 | "weights":[0.2,0.2,0.2,0.2,0.2], 13 | "pct_missing":0.01, 14 | "insert_dollar":"Yes", 15 | "insert_percent":"Yes", 16 | "star_schema":"No", 17 | "label_list":[["america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 18 | "random_seed":42, 19 | "shuffle": true 20 | } 21 | -------------------------------------------------------------------------------- /example/config_multiclass_classification_starschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"classification", 3 | "n_classes":5, 4 | "n_samples":10000, 5 | "n_features":100, 6 | "out_path":"./", 7 | "output":"my_dataset", 8 | "n_informative":30, 9 | "n_duplicate":3, 10 | "n_redundant":5, 11 | "n_clusters":4, 12 | "weights":[0.2,0.2,0.2,0.2,0.2], 13 | "pct_missing":0.01, 14 | "insert_dollar":"Yes", 15 | "insert_percent":"Yes", 16 | "star_schema":"Yes", 17 | "label_list":[["america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev"]], 18 | "random_seed":42, 19 | "shuffle": true 20 | } 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup 3 | 4 | setup(name='snape', 5 | version='0.2', # TODO: this needs to be controlled w a tag 6 | description='Snape Realistic Synthetic Dataset Tool', 7 | url='https://github.com/mbernico/snape', 8 | author='Mike Bernico', 9 | author_email='mike.bernico@gmail.com', 10 | license='Apache 2.0', 11 | packages=['snape'], 12 | install_requires=['scikit-learn>=0.20', 13 | 'pandas', 14 | 'numpy', 15 | 'requests', 16 | 'beautifulsoup4', 17 | 'lxml' 18 | ], 19 | zip_safe=False) 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # simple Makefile to simplify repetitive build env management on posix machines 2 | 3 | PYTHON ?= python 4 | 5 | .PHONY: clean 6 | clean: 7 | $(PYTHON) setup.py clean 8 | rm -rf dist 9 | rm -rf build 10 | rm -rf .pytest_cache 11 | rm -rf snape.egg-info 12 | rm -rf .coverage.* 13 | 14 | .PHONY: install 15 | install: 16 | $(PYTHON) setup.py install 17 | 18 | .PHONY: sdist 19 | sdist: 20 | $(PYTHON) setup.py sdist 21 | 22 | .PHONY: test-dependencies 23 | test-dependencies: 24 | $(PYTHON) -m pip install coverage pytest pytest-cov flake8 25 | 26 | .PHONY: test-unit 27 | test-unit: 28 | $(PYTHON) -m pytest -v --durations=4 --cov snape 29 | 30 | # TODO: add linting 31 | .PHONY: test 32 | test: test-unit 33 | -------------------------------------------------------------------------------- /example/module_usage_example.py: -------------------------------------------------------------------------------- 1 | 2 | from snape.make_dataset import make_dataset 3 | from snape.score_dataset import score_dataset 4 | 5 | # configuration json examples can be found in doc 6 | conf = { 7 | "type": "classification", 8 | "n_classes": 2, 9 | "n_samples": 1000, 10 | "n_features": 10, 11 | "out_path": "./", 12 | "output": "my_dataset", 13 | "n_informative": 3, 14 | "n_duplicate": 0, 15 | "n_redundant": 0, 16 | "n_clusters": 2, 17 | "weights": [0.8, 0.2], 18 | "pct_missing": 0.00, 19 | "insert_dollar": "Yes", 20 | "insert_percent": "Yes", 21 | "star_schema": "No", 22 | "label_list": [], 23 | "random_seed": 42 24 | } 25 | 26 | # make_dataset creates an artificial dataset using the passed dictionary 27 | make_dataset(config=conf) 28 | 29 | # a dataset's testkey can be compared to a prediction file using score_dataset() 30 | results = score_dataset(y_file="student_testkey.csv", y_hat_file="student_predictions.csv") 31 | # results is a tuple of (a_primary_metric, classification_report) 32 | print("AUC = " + str(results[0])) 33 | print(results[1]) 34 | -------------------------------------------------------------------------------- /snape/test/test_utils.py: -------------------------------------------------------------------------------- 1 | 2 | from snape.utils import assert_valid_percent, get_random_state 3 | import pytest 4 | 5 | 6 | @pytest.mark.parametrize( 7 | 'pct,kwargs', [ 8 | 9 | pytest.param( 10 | 0.5, 11 | {}, 12 | ), 13 | 14 | pytest.param( 15 | 0.9, 16 | {}, 17 | ), 18 | 19 | pytest.param( 20 | 0.1, 21 | {}, 22 | ), 23 | 24 | # these only pass with certain kwargs 25 | pytest.param( 26 | 0.0, 27 | {"eq_lower": True}, 28 | ), 29 | 30 | pytest.param( 31 | 1.0, 32 | {"eq_upper": True}, 33 | ), 34 | 35 | ] 36 | ) 37 | def test_valid_percentages(pct, kwargs): 38 | assert_valid_percent(pct, **kwargs) 39 | 40 | 41 | @pytest.mark.parametrize('pct', [0.0, 1.0]) 42 | def test_invalid_percetages(pct): 43 | with pytest.raises(ValueError): 44 | assert_valid_percent(x=pct) 45 | 46 | 47 | @pytest.mark.parametrize( 48 | 'x', [ 49 | 'some random string', 50 | {'an': 'iterable'} 51 | ] 52 | ) 53 | def test_random_state_fails(x): 54 | with pytest.raises(TypeError): 55 | get_random_state(x) 56 | -------------------------------------------------------------------------------- /snape/utils.py: -------------------------------------------------------------------------------- 1 | 2 | from numpy.random import RandomState 3 | import numpy as np 4 | 5 | __all__ = [ 6 | 'assert_is_type', 7 | 'assert_valid_percent', 8 | 'get_random_state' 9 | ] 10 | 11 | def assert_is_type(x, t): 12 | if not isinstance(x, t): 13 | raise TypeError('expected %r but got type=%s' 14 | % (t, type(x))) 15 | return x 16 | 17 | 18 | def assert_valid_percent(x, eq_lower=False, eq_upper=False): 19 | # these are all castable to float 20 | assert_is_type(x, (float, np.float, np.int, int, np.long)) 21 | x = float(x) 22 | 23 | # test lower bound: 24 | if not ((eq_lower and 0. <= x) or ((not eq_lower) and 0. < x)): 25 | raise ValueError('Expected 0. %s x, but got x=%r' 26 | % ('<=' if eq_lower else '<', x)) 27 | if not ((eq_upper and x <= 1.) or ((not eq_upper) and x < 1.)): 28 | raise ValueError('Expected x %s 1., but got x=%r' 29 | % ('<=' if eq_upper else '<', x)) 30 | return x 31 | 32 | 33 | def get_random_state(random_state): 34 | # if it's a seed, return a new seeded RandomState 35 | if random_state is None or \ 36 | isinstance(random_state, (int, np.int, np.long)): 37 | return RandomState(random_state) 38 | # if it's a RandomState, it's been initialized 39 | elif isinstance(random_state, RandomState): 40 | return random_state 41 | else: 42 | raise TypeError('cannot seed new RandomState with type=%s' 43 | % type(random_state)) 44 | -------------------------------------------------------------------------------- /doc/config_classification.json.md: -------------------------------------------------------------------------------- 1 | **type**: string, (default=classification) 2 | 3 | The type of dataset. 4 | 5 | **n_classes**: int, (default=2) 6 | 7 | The number of classes (or labels) of the classification problem. 8 | 9 | **n_samples**: int, (default=10000) 10 | 11 | The number of observations 12 | 13 | **n_features**: int, (default=100) 14 | 15 | The number of features 16 | 17 | **out_path**: string, (default=“./“) 18 | 19 | The output system path 20 | 21 | **output**: string, (default=“my_dataset”) 22 | 23 | The output file name 24 | 25 | **n_informative**: int, (default=30) 26 | 27 | The number of informative features 28 | 29 | **n_duplicate**: int, (default=3) 30 | 31 | The number of perfect collinear features 32 | 33 | **n_redundant**: int, (default=5) 34 | 35 | The number of multicolinear features 36 | 37 | **n_clusters**: int, (default=4) 38 | 39 | The number of gaussian clusters per class 40 | 41 | **weights**: list of floats, (default=[0.8,0.2]) 42 | 43 | A list of class balances 44 | 45 | **pct_missing**: float, (default=0.01) 46 | 47 | The percentage of rows that should have a missing value. 48 | 49 | **insert_dollar**: character, (default=”Yes”) 50 | 51 | Include a dollar sign 52 | 53 | **insert_percent**:string, (default=”Yes”) 54 | 55 | Include a percent symbol 56 | 57 | **n_categorical**: int, (default=3) 58 | 59 | The number of categorical variables to create 60 | 61 | **label_list**: list of lists, (default=[[“america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev”]]) 62 | 63 | A list of lists, each list is the labels for one categorical variable. 64 | 65 | **shuffle**:bool, (default=true) 66 | 67 | Shuffle samples and the features. 68 | 69 | -------------------------------------------------------------------------------- /doc/config_regression.json.md: -------------------------------------------------------------------------------- 1 | **type**: string, (default=regression) 2 | 3 | The type of dataset. 4 | 5 | **n_samples**: int, (default=10000) 6 | 7 | The number of observations. 8 | 9 | **n_features**: int, (default=100) 10 | 11 | The number of features. 12 | 13 | **out_path**: string, (default=“./“) 14 | 15 | The output system path. 16 | 17 | **output**: string, (default=“my_dataset”) 18 | 19 | The output file name. 20 | 21 | **n_informative**: int, (default=30) 22 | 23 | The number of informative features. 24 | 25 | **n_targets**: int, (default=1) 26 | 27 | The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar. 28 | 29 | **effective_rank**: int, (default=1) 30 | 31 | The approximate number of singular vectors required to explain data. 32 | 33 | **tail_strength**: float, (default=0.5) 34 | 35 | The relative importance of the fat noisy tail of the singular values profile. 36 | 37 | **noise**: float, (default=0.0) 38 | 39 | The standard deviation of the gaussian noise applied to the output. 40 | 41 | **pct_missing**: float, (default=0.01) 42 | 43 | The percentage of rows that should have a missing value. 44 | 45 | **insert_dollar**: character, (default=”Yes”) 46 | 47 | Include a dollar sign 48 | 49 | **insert_percent**:string, (default=”Yes”) 50 | 51 | Include a percent symbol 52 | 53 | **n_categorical**: int, (default=3) 54 | 55 | The number of categorical variables to create 56 | 57 | **label_list**: list of lists, (default=[[“america","asia", "euorpe"], ["monday", "tuesday", "wednesday", "thurday", "friday"], ["January","Feb","Mar","Apr","May","Jun","July", "Aug", "sept.","Oct","Nov","Dev”]]) 58 | 59 | A list of lists, each list is the labels for one categorical variable. 60 | 61 | **shuffle**:bool, (default=true) 62 | 63 | Shuffle samples and the features. 64 | 65 | -------------------------------------------------------------------------------- /snape/test/test_score_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | from numpy.testing import assert_almost_equal 3 | from snape.score_dataset import * 4 | from snape.utils import get_random_state, assert_valid_percent 5 | import os 6 | 7 | random_state = get_random_state(42) 8 | y_rand = (random_state.rand(200)) 9 | 10 | r = {'y': y_rand * 10, 11 | 'y_hat': y_rand * 10 - y_rand 12 | } 13 | 14 | regression_df = pd.DataFrame(r) 15 | c = {'y': [1, 1, 1, 1, 0, 0, 0, 0], 16 | 'y_hat': [1, 0.9, 0.4, 0.95, 0, 0.1, 0.6, 0.15] 17 | } 18 | 19 | classification_df = pd.DataFrame(c) 20 | m = {'y': [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], 21 | 'y_hat': [0, 1, 0, 1, 1, 3, 1, 2, 2, 3, 2, 3] 22 | } 23 | 24 | multiclass_df = pd.DataFrame(m) 25 | 26 | 27 | def test_guess_problem_type(): 28 | assert guess_problem_type(regression_df['y']), 'regression' 29 | assert guess_problem_type(classification_df['y']), 'binary' 30 | assert guess_problem_type(multiclass_df['y']), 'multiclass' 31 | 32 | 33 | def test_score_binary_classification(): 34 | y = classification_df['y'] 35 | y_hat = classification_df['y_hat'] 36 | assert score_binary_classification(y, y_hat, report=False)[0] == 0.9375 37 | assert "---Binary Classification Score---" in score_binary_classification(y, y_hat, report=False)[1] 38 | 39 | 40 | def test_score_multiclass_classification(): 41 | y = multiclass_df['y'] 42 | y_hat = multiclass_df['y_hat'] 43 | assert round(score_multiclass_classification(y, y_hat, report=False)[0], 2) == 0.67 44 | assert "---Multiclass Classification Score---" in score_multiclass_classification(y, y_hat, report=False)[1] 45 | 46 | 47 | def test_score_regression(): 48 | y = regression_df['y'] 49 | y_hat = regression_df['y_hat'] 50 | assert_almost_equal(round(score_regression(y, y_hat, report=False)[0], 2), 0.48) 51 | assert "---Regression Score---" in score_regression(y, y_hat, report=False)[1] 52 | 53 | 54 | def test_arg_parser(): 55 | args = parse_args(["-ktest_key.csv","-ptest_pred.csv"]) 56 | assert args['pred'] == 'test_pred.csv', "parse_args failed to parse it's argument" 57 | assert args['key'] == 'test_key.csv', "parse_args failed to parse it's argument" 58 | 59 | 60 | def setup_file_classification(): 61 | y = classification_df['y'] 62 | y_hat = classification_df['y_hat'] 63 | y.to_csv("y_test.csv", header=True, index=False) 64 | y_hat.to_csv("y_hat_test.csv", header=False, index=False) 65 | 66 | 67 | def setup_file_regression(): 68 | y = regression_df['y'] 69 | y_hat = regression_df['y_hat'] 70 | y.to_csv("y_test.csv", header=True, index=False) 71 | y_hat.to_csv("y_hat_test.csv", header=False, index=False) 72 | 73 | 74 | def setup_file_multiclass(): 75 | y = multiclass_df['y'] 76 | y_hat = multiclass_df['y_hat'] 77 | y.to_csv("y_test.csv", header=True, index=False) 78 | y_hat.to_csv("y_hat_test.csv", header=False, index=False) 79 | 80 | 81 | def teardown_file_func(): 82 | os.remove("y_test.csv") 83 | os.remove("y_hat_test.csv") 84 | 85 | 86 | def with_setup(setup, teardown): 87 | """Nose is deprecated, so this is a lightweight decorator that does the same thing""" 88 | def _decorator(func): 89 | def _test_wrapper(*args, **kwargs): 90 | setup() 91 | try: 92 | return func(*args, **kwargs) 93 | finally: 94 | teardown() 95 | return _test_wrapper 96 | return _decorator 97 | 98 | 99 | @with_setup(setup=setup_file_classification, teardown=teardown_file_func) 100 | def test_read_files(): 101 | y, y_hat = read_files("y_test.csv", "y_hat_test.csv") 102 | assert y.shape[0] > 1, "y should have more than one row" 103 | assert y_hat.shape[0] > 1, "y hat should have more than one row" 104 | 105 | 106 | @with_setup(setup=setup_file_classification, teardown=teardown_file_func) 107 | def test_score_datasetclassification(): 108 | results = score_dataset(y_file="y_test.csv", y_hat_file="y_hat_test.csv") 109 | assert_valid_percent(results[0], "Not a valid percent for AUC") 110 | 111 | 112 | @with_setup(setup=setup_file_regression, teardown=teardown_file_func) 113 | def test_score_dataset_regression(): 114 | results = score_dataset(y_file="y_test.csv", y_hat_file="y_hat_test.csv") 115 | assert results[0] > 0, "MAE probably isn't 0" 116 | 117 | 118 | @with_setup(setup=setup_file_multiclass, teardown=teardown_file_func) 119 | def test_score_dataset_multiclass(): 120 | results = score_dataset(y_file="y_test.csv", y_hat_file="y_hat_test.csv") 121 | assert_valid_percent(results[0], "Not a valid percent for Accuracy") 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://travis-ci.org/mbernico/snape.svg?branch=master)](https://travis-ci.org/mbernico/snape) 2 | [![Coverage Status](https://coveralls.io/repos/github/mbernico/snape/badge.svg?branch=master)](https://coveralls.io/github/mbernico/snape?branch=master) 3 | 4 | # Snape 5 | 6 | Snape is a convenient artificial dataset generator that wraps sklearn's make_classification and make_regression 7 | and then adds in 'realism' features such as complex formating, varying scales, categorical variables, 8 | and missing values. 9 | 10 | ## Motivation 11 | 12 | Snape was primarily created for academic and educational settings. It has been used to create datasets that are unique per 13 | student, per assignment for various homework assignments. It has also been used to create class wide assessments in 14 | conjunction with 'Kaggle In the Classroom.' 15 | 16 | Other users have suggested non-academic uses cases as well, including 'interview screening problems,' model comparison, 17 | etc. 18 | 19 | ## Installation 20 | 21 | 22 | ### Via Github 23 | ```bash 24 | git clone https://github.com/mbernico/snape.git 25 | cd snape 26 | python setup.py install 27 | ``` 28 | ### Via pip 29 | *Coming Soon...* 30 | 31 | ## Quick Start 32 | 33 | Snape can run either as a python module or as a command line application. 34 | 35 | ### Command Line Usage 36 | 37 | #### Creating a Dataset 38 | 39 | From the main directory in the git repo: 40 | ```bash 41 | 42 | python snape/make_dataset.py -c example/config_classification.json 43 | ``` 44 | Will use the configuration file example/config_classification.json to create an artificial dataset called 'my_dataset' 45 | (which is specified in the json config, more on this later...). 46 | 47 | The dataset will consist of three files: 48 | * my_dataset_train.csv (80% of the artificial dataset with all dependent and independent variables) 49 | * my_dataset_test.csv (20% of the artificial dataset with only the dependent variables present) 50 | * my_dataset_testkey.csv (the same 20% as _test, including the dependent variables) 51 | 52 | Note that if a star schema is generated, additional csv files will be generated. There will be one extra csv file per dimension, but only the main 'fact table' dataset will be split into test and train files. 53 | 54 | The train and test files can be given to a student. The student can respond with a file of predictions, which can be 55 | scored against the testkey as follows: 56 | 57 | #### Scoring a Dataset 58 | 59 | ```bash 60 | snape/score_dataset.py -p example/student_predictions.csv -k example/student_testkey.csv 61 | ``` 62 | Snape's score_dataset.py will attempt to detect the problem type and then score it, printing some metrics 63 | 64 | 65 | ``` 66 | Problem Type Detection: binary 67 | ---Binary Classification Score--- 68 | precision recall f1-score support 69 | 70 | 0 0.81 0.99 0.89 1601 71 | 1 0.50 0.06 0.11 399 72 | 73 | avg / total 0.75 0.80 0.73 2000 74 | ``` 75 | 76 | 77 | ### Python Module Usage 78 | 79 | 80 | #### Creating a Dataset 81 | ```python 82 | from snape.make_dataset import make_dataset 83 | 84 | # configuration json examples can be found in doc 85 | conf = { 86 | "type": "classification", 87 | "n_classes": 2, 88 | "n_samples": 1000, 89 | "n_features": 10, 90 | "out_path": "./", 91 | "output": "my_dataset", 92 | "n_informative": 3, 93 | "n_duplicate": 0, 94 | "n_redundant": 0, 95 | "n_clusters": 2, 96 | "weights": [0.8, 0.2], 97 | "pct_missing": 0.00, 98 | "insert_dollar": "Yes", 99 | "insert_percent": "Yes", 100 | "n_categorical": 0, 101 | "star_schema": "No", 102 | "label_list": [] 103 | } 104 | 105 | make_dataset(config=conf) 106 | ``` 107 | 108 | 109 | #### Scoring a Dataset 110 | 111 | ```python 112 | from snape.score_dataset import score_dataset 113 | 114 | # a dataset's testkey can be compared to a prediction file using score_dataset() 115 | results = score_dataset(y_file="student_testkey.csv", y_hat_file="student_predictions.csv") 116 | # results is a tuple of (a_primary_metric, classification_report) 117 | print("AUC = " + str(results[0])) 118 | print(results[1]) 119 | ```` 120 | 121 | 122 | ## Dataset Generation Config 123 | 124 | 1. [Classification JSON](doc/config_classification.json.md) 125 | 2. [Regression JSON](doc/config_regression.json.md) 126 | 127 | 128 | ## Why Snape? 129 | Snape is primarily used for creating complex datasets that *challenge* students and teach defense against the dark 130 | arts of machine learning. :) 131 | 132 | 133 | -------------------------------------------------------------------------------- /snape/score_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | ######################################################################## 3 | # Scores a Machine Learning Problem Dataset 4 | # Suitable for assigning to a student for a homework assignment 5 | # 6 | # 7 | ######################################################################## 8 | 9 | import argparse 10 | from math import sqrt 11 | 12 | import pandas as pd 13 | from sklearn.metrics import classification_report 14 | from sklearn.metrics import roc_auc_score 15 | from sklearn.metrics import accuracy_score 16 | from sklearn.metrics import mean_absolute_error 17 | from sklearn.metrics import mean_squared_error 18 | from sklearn.metrics import r2_score 19 | import sys 20 | 21 | 22 | def parse_args(args): 23 | """ 24 | Returns arguments passed at the command line as a dict 25 | :return: configuration dictionary 26 | 27 | """ 28 | parser = argparse.ArgumentParser(description='Scores a ML dataset solution.') 29 | parser.add_argument('-p', help="Predictions File", required=True, 30 | dest='pred') 31 | parser.add_argument('-k', help="Key File", required=True, 32 | dest='key') 33 | return vars(parser.parse_args(args)) 34 | 35 | def read_files(y_file_name, yhat_file_name): 36 | """ 37 | Opens file names and returns dataframes 38 | 39 | :return: y, y_hat as a tuple of dataframes 40 | """ 41 | y_hat = pd.read_csv(yhat_file_name, header=None) # i'm expecting no header for now. This might be a problem. 42 | # our test key has the features and the answer in it, we only need the answer to score 43 | y_df = pd.read_csv(y_file_name) 44 | y = y_df['y'] 45 | return y, y_hat 46 | 47 | 48 | def guess_problem_type(key): 49 | """ 50 | Infers the problem type, using the key dataframe 51 | :param key: the answer dataframe 52 | :return: Inferred Problem Type 53 | """ 54 | num_values = len(key.unique()) 55 | if num_values == 2: 56 | return "binary" 57 | elif (num_values > 2) and (num_values < 100): # assumptions that will burn me later probably 58 | return "multiclass" 59 | else: 60 | return "regression" 61 | 62 | 63 | def score_binary_classification(y, y_hat, report=True): 64 | """ 65 | Create binary classification output 66 | :param y: true value 67 | :param y_hat: class 1 probabilities 68 | :param report: 69 | :return: 70 | """ 71 | y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat] # convert probability to class for classification report 72 | 73 | report_string = "---Binary Classification Score--- \n" 74 | report_string += classification_report(y, y_hat_class) 75 | score = roc_auc_score(y, y_hat) 76 | report_string += "\nAUC = " + str(score) 77 | 78 | if report: 79 | print(report_string) 80 | 81 | return score, report_string 82 | 83 | 84 | def score_multiclass_classification(y, y_hat, report=True): 85 | """ 86 | Create multiclass classification score 87 | :param y: 88 | :param y_hat: 89 | :return: 90 | """ 91 | report_string = "---Multiclass Classification Score--- \n" 92 | report_string += classification_report(y, y_hat) 93 | score = accuracy_score(y, y_hat) 94 | report_string += "\nAccuracy = " + str(score) 95 | 96 | if report: 97 | print(report_string) 98 | 99 | return score, report_string 100 | 101 | 102 | def score_regression(y, y_hat, report=True): 103 | """ 104 | Create regression score 105 | :param y: 106 | :param y_hat: 107 | :return: 108 | """ 109 | r2 = r2_score(y, y_hat) 110 | rmse = sqrt(mean_squared_error(y, y_hat)) 111 | mae = mean_absolute_error(y, y_hat) 112 | 113 | report_string = "---Regression Score--- \n" 114 | report_string += "R2 = " + str(r2) + "\n" 115 | report_string += "RMSE = " + str(rmse) + "\n" 116 | report_string += "MAE = " + str(mae) + "\n" 117 | 118 | if report: 119 | print(report_string) 120 | 121 | return mae, report_string 122 | 123 | 124 | def score_dataset(y_file=None, y_hat_file=None): 125 | """ 126 | 1 Reads in key file and prediction file (students predictions) 127 | 2 guesses problem type 128 | 3 scores problem 129 | 130 | :return: 131 | """ 132 | 133 | report_output = True 134 | if y_file is None and y_hat_file is None: 135 | # called from the command line so parse configuration 136 | args = parse_args(sys.argv[1:]) 137 | y, y_hat = read_files(args['key'], args['pred']) 138 | else: 139 | y, y_hat = read_files(y_file, y_hat_file) 140 | report_output = False 141 | 142 | problem_type = guess_problem_type(y) 143 | print("Problem Type Detection: " + problem_type) 144 | print("y shape: " + str(y.shape) + " y hat shape: " + str(y_hat.shape)) 145 | if problem_type == 'binary': 146 | results = score_binary_classification(y, y_hat[0], report=report_output) 147 | elif problem_type == 'multiclass': 148 | results = score_multiclass_classification(y, y_hat[0], report=report_output) 149 | else: 150 | results = score_regression(y, y_hat) 151 | 152 | if not report_output: 153 | return results 154 | 155 | 156 | if __name__ == "__main__": 157 | score_dataset() 158 | -------------------------------------------------------------------------------- /snape/test/test_make_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | from snape.make_dataset import * 3 | from snape.utils import get_random_state 4 | 5 | import pytest 6 | import glob 7 | import os 8 | 9 | 10 | random_state = get_random_state(42) 11 | 12 | 13 | def test_create_classification_dataset(): 14 | df = create_classification_dataset(n_samples=100, n_features=10, n_informative=3, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=2, weights=[0.5, 0.5], n_classes=2, 16 | random_state=random_state, shuffle=False) 17 | 18 | assert df.shape[0] == 100, "Sample Size Doesn't Match" 19 | assert df.shape[1] == 11, "Feature Count" 20 | assert df['y'].value_counts().shape[0] == 2, "Expected Shape of Classes Do Not Match" 21 | 22 | 23 | def test_create_regression_dataset(): 24 | df = create_regression_dataset(n_samples=100, n_features=10, n_informative=3, effective_rank=1, 25 | tail_strength=0.5, noise=0.0, random_state=random_state, 26 | shuffle=False) 27 | 28 | assert df.shape[0] == 100, "Sample Size Doesn't Match" 29 | assert df.shape[1] == 11, "Feature Count" 30 | 31 | 32 | def test_create_categorical_features(): 33 | rs = get_random_state(42) 34 | df = pd.DataFrame(random_state.randn(100, 4), columns=list('ABCD')) 35 | cat_df = create_categorical_features(df, [['a', 'b'], ['red', 'blue']], random_state=rs) 36 | assert (cat_df.dtypes == "category").sum() == 2, \ 37 | "Actual: %s" % str(cat_df.dtypes.value_counts()) 38 | 39 | 40 | def test_insert_special_char(): 41 | df = pd.DataFrame(random_state.randn(100, 1), columns=list('A')) 42 | df_spec = insert_special_char("$", df, random_state=random_state) 43 | assert df_spec['A'].str.contains('$').all() 44 | 45 | df_spec = insert_special_char("%", df, random_state=random_state) 46 | assert df_spec['A'].str.contains('$').all() 47 | 48 | # using a non $ or % should raise a value error 49 | with pytest.raises(ValueError): 50 | insert_special_char("!", df, random_state=random_state) 51 | 52 | 53 | def test_insert_missing_values(): 54 | df = pd.DataFrame(random_state.randn(100, 4), columns=list('ABCD')) 55 | df_result = insert_missing_values(df, 1, random_state=random_state) 56 | assert df_result.isnull().any().any() 57 | 58 | df_result = insert_missing_values(df, 0, random_state=random_state) 59 | assert not df_result.isnull().any().any() 60 | 61 | 62 | def test_star_schema(): 63 | df = create_classification_dataset(n_samples=100, n_features=10, n_informative=3, n_redundant=0, 64 | n_repeated=0, n_clusters_per_class=2, weights=[0.5, 0.5], n_classes=2, 65 | random_state=random_state) 66 | 67 | df = create_categorical_features(df, [['a', 'b'], ['red', 'blue']], random_state=random_state) 68 | df = insert_special_char('$', df, random_state=random_state) 69 | df = insert_special_char('%', df, random_state=random_state) 70 | df = insert_missing_values(df, .8, random_state=random_state) 71 | fact_df = make_star_schema(df) 72 | 73 | # Assert file generation 74 | file_list = glob.glob('./*_dim.csv') 75 | diff_list = list(filter(lambda x: x.endswith('_dim.csv'), file_list)) 76 | assert len(diff_list) == 2 77 | 78 | # Delete the tester files 79 | for file_path in file_list: 80 | os.remove(file_path) 81 | 82 | # Assert key column creation 83 | columns = fact_df.columns 84 | key_cols = list(filter(lambda x: x.endswith('_key'), columns)) 85 | assert len(key_cols) == 3 86 | 87 | # Assert key columns don't contain any nulls 88 | key_df = fact_df[key_cols] 89 | na_df = key_df.dropna() 90 | assert len(na_df) == len(key_df), "Nulls exist in the dimension key columns in the star schema." 91 | 92 | # Assert that an index named 'primary_key' was added. 93 | assert 'primary_key' in fact_df.columns, "Index named pk was not added to the fact table" 94 | assert len(fact_df.primary_key.value_counts()) == len(fact_df), "Primary key isn't unique." 95 | 96 | 97 | def test_load_json(): 98 | jf = os.path.join(os.path.dirname(__file__), '../../example/config_classification.json') 99 | c = load_config(jf) 100 | assert c['type'] == 'classification', "JSON load not sane" 101 | 102 | 103 | def test_arg_parser(): 104 | args = parse_args(["-ctest.json"]) 105 | assert args['config'] == 'test.json', "parse_args failed to parse it's argument" 106 | 107 | 108 | def with_dataset_teardown(func): 109 | def _closure(*args, **kwargs): 110 | try: 111 | return func(*args, **kwargs) 112 | finally: 113 | for p in ('test_test.csv', 'test_testkey.csv', 'test_train.csv'): 114 | if os.path.exists(p): 115 | os.unlink(p) 116 | return _closure 117 | 118 | 119 | @with_dataset_teardown 120 | def test_write_dataset(): 121 | df = pd.DataFrame(random_state.randn(100, 5), columns=list('ABCDy')) 122 | write_dataset(df, "test") 123 | 124 | 125 | @with_dataset_teardown 126 | def test_main(): 127 | # configuration json examples can be found in doc 128 | conf = { 129 | "type": "classification", 130 | "n_classes": 2, 131 | "n_samples": 1000, 132 | "n_features": 10, 133 | "out_path": "./", 134 | "output": "test", 135 | "n_informative": 3, 136 | "n_duplicate": 0, 137 | "n_redundant": 0, 138 | "n_clusters": 2, 139 | "weights": [0.8, 0.2], 140 | "pct_missing": 0.00, 141 | "insert_dollar": "Yes", 142 | "insert_percent": "Yes", 143 | "star_schema": "No", 144 | "label_list": [], 145 | "random_seed": 42 146 | } 147 | 148 | # make_dataset creates an artificial dataset using the passed dictionary 149 | make_dataset(config=conf) 150 | 151 | assert os.path.isfile("test_train.csv"), "Dataset not created" 152 | -------------------------------------------------------------------------------- /snape/test/test_make_image_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | import shutil 3 | from snape.make_image_dataset import * 4 | from snape.make_image_dataset import _ImageNet, _ImageGrabber 5 | from snape.utils import get_random_state 6 | import glob 7 | import pytest 8 | 9 | conf = { 10 | "n_classes": 2, 11 | "n_samples": 11, 12 | "out_path": "./test_images/", 13 | "weights": [.8, .2], 14 | "image_source": "imagenet", 15 | "random_seed": 42 16 | } 17 | 18 | random_state = get_random_state(conf["random_seed"]) 19 | 20 | 21 | def test_make_image_dataset(): 22 | os.mkdir(conf["out_path"]) 23 | try: 24 | make_image_dataset(conf) 25 | 26 | subdirs = glob.glob(f'{conf["out_path"]}/*') 27 | print(f"Subdirs: {subdirs}") 28 | 29 | assert len(subdirs) == conf["n_classes"], \ 30 | f'Expected {conf["n_classes"]} classes, but got {len(subdirs)}' 31 | 32 | n_samples = conf["n_samples"] 33 | weights = conf["weights"] 34 | exp_class_counts = sorted([int(w * n_samples) for w in weights]) 35 | 36 | # sort subdir names by present images under each 37 | subdir_counts = sorted([len(glob.glob(f'{s}/*')) for s in subdirs]) 38 | assert exp_class_counts == subdir_counts, \ 39 | f"\nExpected class counts: {exp_class_counts}" \ 40 | f"\nActual class counts: {subdir_counts}" 41 | 42 | finally: 43 | shutil.rmtree(conf["out_path"]) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | 'cfg', [ 48 | 49 | # missing an arg 50 | { 51 | "n_samples": 11, 52 | "out_path": "./test_images/", 53 | "weights": [.8, .2], 54 | "image_source": "imagenet", 55 | "random_seed": 42 56 | }, 57 | 58 | # wrong arg 59 | { 60 | "nclasses": 2, 61 | "n_samples": 11, 62 | "out_path": "./test_images/", 63 | "weights": [.8, .2], 64 | "image_source": "imagenet", 65 | "random_seed": 42 66 | } 67 | ] 68 | ) 69 | def test_check_configuration(cfg): 70 | with pytest.raises(AssertionError): 71 | check_configuration(cfg) 72 | 73 | 74 | class TestImageNet: 75 | def __init__(self): 76 | self.image_net = _ImageNet(n_classes=conf["n_classes"], 77 | weights=conf["weights"], 78 | n_samples=conf["n_samples"], 79 | output_dir=conf["out_path"], 80 | random_state=random_state) 81 | 82 | def test_get_images(self): 83 | os.mkdir(conf["out_path"]) 84 | try: 85 | self.image_net.get_images() 86 | sub_dir1 = conf["out_path"] + os.listdir(conf["out_path"])[0] 87 | sub_dir2 = conf["out_path"] + os.listdir(conf["out_path"])[1] 88 | n_images1 = len(os.listdir(sub_dir1)) 89 | n_images2 = len(os.listdir(sub_dir2)) 90 | class1_size = int(conf["n_samples"] * conf["weights"][0]) 91 | assert (class1_size == n_images1) or (class1_size == n_images2), "Did not download n images" 92 | except: 93 | raise 94 | finally: 95 | shutil.rmtree(conf["out_path"]) 96 | 97 | def test_sample_synset_links(self): 98 | n = 5 99 | wnid = 'n02114855' 100 | os.mkdir(conf["out_path"]) 101 | try: 102 | self.image_net.sample_synset_links(wnid, n, conf["out_path"]) 103 | n_images = len(os.listdir(conf["out_path"] + '/' + wnid)) 104 | assert n == n_images, "Did not download n images" 105 | assert wnid in os.listdir(conf["out_path"]), "Did not get the requested synset" 106 | except: 107 | raise 108 | finally: 109 | shutil.rmtree(conf["out_path"]) 110 | 111 | def test_get_ilsvrc_1000_synsets(self): 112 | synsets = self.image_net.get_ilsvrc_1000_synsets() 113 | assert len(synsets) == 1000, "ILSVRC page parsed incorrectly" 114 | 115 | def test_get_synset_image_links(self): 116 | wnid = 'n02114855' 117 | links = self.image_net.get_synset_image_links(wnid) 118 | assert len(links) > 0, "Did not return any image links" 119 | 120 | def test_retrieve_class_counts(self): 121 | class_counts = self.image_net.retrieve_class_counts() 122 | assert isinstance(class_counts, pd.core.frame.DataFrame), "Class counts not returned in a dataframe" 123 | 124 | 125 | class TestImageGrabber: 126 | 127 | def test_download_image(self): 128 | good_url = "http://farm4.static.flickr.com/3290/2998414960_01dd35d094.jpg" 129 | good_im_path = "ducky.jpg" 130 | _ImageGrabber().download_image(good_url, good_im_path) 131 | good_im_type = imghdr.what(good_im_path) 132 | os.remove(good_im_path) 133 | assert good_im_type is not None 134 | bad_url = "https://mckinleyleather.com/image/130963084.jpg" 135 | bad_im_path = "no_ducky.jpg" 136 | _ImageGrabber().download_image(bad_url, bad_im_path) 137 | is_file = os.path.isfile(bad_im_path) 138 | assert not is_file 139 | 140 | def test_catch_unavailable_image(self): 141 | good_url = "http://farm4.static.flickr.com/3290/2998414960_01dd35d094.jpg" 142 | good_img_data = requests.get(good_url) 143 | assert not _ImageGrabber.catch_unavailable_image(good_img_data), "The good image tested was found to be bad" 144 | stale_url = "https://mckinleyleather.com/image/130963084.jpg" 145 | stale_img_data = requests.get(stale_url) 146 | assert _ImageGrabber.catch_unavailable_image(stale_img_data), "The stale image tested was found to be good" 147 | junk_url = "http://farm4.static.flickr.com/3225/2806850016_9bf939037e.jpg" 148 | junk_img_data = requests.get(junk_url) 149 | assert _ImageGrabber.catch_unavailable_image(junk_img_data), "The junk image tested was found to be good" 150 | 151 | 152 | class TestOpenImages: 153 | pass 154 | 155 | 156 | class TestGoogleSearch: 157 | pass 158 | -------------------------------------------------------------------------------- /snape/make_image_dataset.py: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | # 3 | # 4 | # The snape.make_image_dataset module provides functionality for downloading 5 | # a unique image dataset. It adapts the same interface as snape.make_dataset, 6 | # such that a user inputs their desired configuration in either the form of a 7 | # dictionary -for calling from python, or json -for calling via command line. 8 | # 9 | # 10 | ############################################################################# 11 | 12 | import imghdr 13 | import os 14 | import requests 15 | import pandas as pd 16 | import sys 17 | from bs4 import BeautifulSoup 18 | from snape import flicker 19 | from snape.make_dataset import parse_args, load_config 20 | from snape.utils import get_random_state 21 | 22 | 23 | def make_image_dataset(config=None): 24 | if config is None: 25 | # called from the command line so parse configuration 26 | args = parse_args(sys.argv[1:]) 27 | config = load_config(args['config']) 28 | 29 | random_state = get_random_state(config["random_seed"]) 30 | 31 | if config["image_source"] == "imagenet": 32 | _ImageNet(n_classes=config["n_classes"], 33 | weights=config["weights"], 34 | n_samples=config["n_samples"], 35 | output_dir=config["out_path"], 36 | random_state=random_state).get_images() 37 | 38 | elif config["image_source"] == "openimages": 39 | print("Not yet supported. The only image_source currently supported is 'imagenet'") 40 | 41 | elif config["image_source"] == "googlesearch": 42 | print("Not yet supported. The only image_source currently supported is 'imagenet'") 43 | 44 | else: 45 | print(config["image_source"], "is not a supported image_source") 46 | print("The only image_source currently supported is 'imagenet'") 47 | 48 | 49 | def check_configuration(conf): 50 | # todo: check values assigned to each config key 51 | expected_conf_args = ["n_classes", "n_samples", "out_path", "weights", "image_source", "random_seed"] 52 | for key in conf.keys(): 53 | assert key in expected_conf_args, key + " is not an allowed configuration argument" 54 | for key in expected_conf_args: 55 | assert key in conf.keys(), key + " was not specified in the configuration" 56 | 57 | 58 | class _ImageNet: 59 | 60 | # todo: prescreen image links for junk 61 | # todo: precompute available synsets 62 | # todo: return class labels 63 | def __init__(self, n_classes, weights, n_samples, output_dir, random_state=None): 64 | self.ilsvrc_synsets = self.get_ilsvrc_1000_synsets() 65 | self.random_state = get_random_state(random_state) 66 | self.chosen_synsets = self.random_state.choice(self.ilsvrc_synsets, n_classes, replace=False) 67 | self.n_samples = n_samples 68 | self.output_dir = output_dir 69 | self.weights = weights 70 | 71 | def get_images(self): 72 | for i, syn in enumerate(self.chosen_synsets): 73 | print(syn) 74 | n = int(self.n_samples * self.weights[i]) 75 | self.sample_synset_links(syn, n, self.output_dir) 76 | 77 | def sample_synset_links(self, wnid, n, img_dir): 78 | img_links = self.get_synset_image_links(wnid) 79 | i = 0 80 | sub_dir = img_dir + wnid 81 | os.mkdir(sub_dir) 82 | while i < n: 83 | pop_ix = self.random_state.choice(len(img_links), 1)[0] 84 | sam = img_links.pop(pop_ix) 85 | file_name = img_dir + wnid + '/' + str(i) + '.jpg' 86 | try: 87 | _ImageGrabber().download_image(sam, file_name) 88 | except: 89 | pass 90 | i = len(os.listdir(sub_dir)) 91 | if len(img_links) == 0: 92 | break 93 | # todo: add more functionality for exiting if stuck in while loop 94 | 95 | @staticmethod 96 | def get_ilsvrc_1000_synsets(): 97 | request = requests.get("http://image-net.org/challenges/LSVRC/2014/browse-synsets") 98 | soup = BeautifulSoup(request.text, "html.parser") 99 | html_list = soup.findAll('a') 100 | wnid_list = [] 101 | for h in html_list: 102 | url = h.attrs['href'] 103 | if 'wnid=' in url: 104 | wnid_list.append(url[-9:]) 105 | return wnid_list 106 | 107 | @staticmethod 108 | def get_synset_image_links(wnid): 109 | url = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=" 110 | url += wnid 111 | request = requests.get(url) 112 | link_list = request.text.split('\r\n') 113 | return link_list 114 | 115 | @staticmethod 116 | def retrieve_class_counts(): 117 | request = requests.get("http://www.image-net.org/api/xml/ReleaseStatus.xml") 118 | soup = BeautifulSoup(request.text, "xml") 119 | row_list = [] 120 | for synset in soup.findAll('synset'): 121 | row_list.append(synset.attrs) 122 | df = pd.DataFrame(row_list) 123 | df['numImages'] = pd.to_numeric(df['numImages']) 124 | return df 125 | 126 | 127 | class _ImageGrabber: 128 | 129 | def download_image(self, image_url, file_out): 130 | img_data = requests.get(image_url) 131 | if self.catch_unavailable_image(img_data): 132 | pass 133 | else: 134 | with open(file_out, 'wb') as handler: 135 | handler.write(img_data.content) 136 | file_type = imghdr.what(file_out) 137 | if file_type is None: 138 | os.remove(file_out) 139 | else: 140 | print(image_url) 141 | 142 | @staticmethod 143 | def catch_unavailable_image(img_data): 144 | not_an_image = 'image' not in img_data.headers['Content-Type'] 145 | im1_check = img_data.content == flicker.junk_image1 146 | im2_check = img_data.content == flicker.junk_image2 147 | is_it_junk = not_an_image or im1_check or im2_check 148 | return is_it_junk 149 | 150 | 151 | class _OpenImages: 152 | # todo: build this class for scraping the OpenImages dataset 153 | pass 154 | 155 | 156 | class _GoogleSearch: 157 | # todo: build this class for scraping the google api 158 | pass 159 | 160 | # todo: embed each set of class labels in word vectors & pre-compute similarity matrix 161 | 162 | if __name__ == "__main__": 163 | make_image_dataset() 164 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /snape/make_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | ######################################################################## 3 | # Creates a Machine Learning Problem Dataset 4 | # Suitable for assigning to a student for a homework assignment 5 | # Mostly convenience code for sklearn's make_classification routines 6 | # 7 | # 8 | ######################################################################## 9 | 10 | from sklearn.datasets import make_classification, make_regression 11 | from snape.utils import assert_is_type, get_random_state, assert_valid_percent 12 | 13 | import pandas as pd 14 | import numpy as np 15 | import argparse 16 | import json 17 | import re 18 | import os 19 | import sys 20 | 21 | from sklearn.model_selection import train_test_split 22 | 23 | 24 | def parse_args(args): 25 | """ 26 | Returns arguments passed at the command line as a dict 27 | """ 28 | parser = argparse.ArgumentParser(description='Generates a machine Learning Dataset.') 29 | parser.add_argument('-c', help="Config File Location", required=True, 30 | dest='config') 31 | return vars(parser.parse_args(args)) 32 | 33 | 34 | def load_config(config_name): 35 | """ 36 | Loads a json config file and returns a config dictionary. 37 | :param config_name: the path to the config json 38 | """ 39 | with open(config_name) as config_file: 40 | config = json.load(config_file) 41 | return config 42 | 43 | 44 | def rename_columns(df, prefix='x'): 45 | """ 46 | Rename the columns of a dataframe to have X in front of them 47 | 48 | :param df: data frame we're operating on 49 | :param prefix: the prefix string 50 | """ 51 | # the prefix needs to be a string 52 | assert_is_type(prefix, str) 53 | 54 | df = df.copy() 55 | df.columns = [prefix + str(i) for i in df.columns] 56 | return df 57 | 58 | 59 | def insert_missing_values(df, percent_rows, random_state=None): 60 | """ 61 | Inserts missing values into a data frame. 62 | 63 | :param df: data frame we're operating on 64 | :param percent_rows: the percentage of rows that should have a missing value. 65 | :param random_state: the numpy RandomState 66 | :return: a df with missing values 67 | """ 68 | # get the initialized random_state (if not already initialized) 69 | random_state = get_random_state(random_state) 70 | df = df.copy() 71 | 72 | def _insert_random_null(x): 73 | """ 74 | Chose a random column in a df row to null. This 75 | operates in-place. But it's on the copy, so it should be OK. 76 | 77 | :param x: the data frame 78 | """ 79 | # -1 because last col will always be y 80 | x[random_state.randint(0, len(x) - 1)] = np.nan 81 | return x 82 | 83 | # this is a "truthy" check. If it's zero or False, this will work. 84 | if not percent_rows: 85 | return df 86 | else: 87 | # otherwise validate that it's a float 88 | percent_rows = assert_valid_percent(percent_rows, eq_upper=True) # eq_lower not necessary because != 0. 89 | sample_index = df.sample(frac=percent_rows, random_state=random_state).index # random sample of rows to null 90 | df.loc[sample_index] = df.loc[sample_index].apply(_insert_random_null, axis=1) 91 | return df 92 | 93 | 94 | def insert_special_char(character, df, random_state=None): 95 | """ 96 | Chooses a column to reformat as currency or percentage, including a $ or % string, to make cleaning harder 97 | 98 | :param character: either $ or % 99 | :param df: the dataframe we're operating on 100 | :param random_state: the numpy RandomState 101 | :return: A dataframe with a single column chosen at random converted to a % or $ format 102 | """ 103 | # get the initialized random_state (if not already initialized) 104 | random_state = get_random_state(random_state) 105 | df = df.copy() 106 | 107 | # choose a column at random, that isn't Y. Only choose from numeric columns (no other eviled up columns) 108 | chosen_col = random_state.choice([col for col in df.select_dtypes(include=['number']).columns if col != 'y']) 109 | 110 | # assert that character is a string and that it's in ('$', '%') 111 | assert_is_type(character, str) 112 | if character not in ('$', '%'): 113 | raise ValueError('expected `character` to be in ("$", "%"), but got {0}'.format(character)) 114 | 115 | # do scaling first: 116 | df[chosen_col] = (df[chosen_col] - df[chosen_col].mean()) / df[chosen_col].std() 117 | 118 | # do the specific div/mul operations 119 | if character is "$": 120 | # multiply by 1000, finally add a $ 121 | df[chosen_col] = (df[chosen_col] * 1000).round(decimals=2).map(lambda x: "$" + str(x)) 122 | else: # elif character is "%": 123 | # divide by 100, finally add a $ 124 | df[chosen_col] = (df[chosen_col] / 100).round(decimals=2).map(lambda x: str(x) + "%") 125 | 126 | return df 127 | 128 | 129 | def create_categorical_features(df, label_list, random_state=None, label_name='y'): 130 | """ 131 | Creates random categorical variables 132 | 133 | :param df: data frame we're operation on 134 | :param label_list: A list of lists, each list is the labels for one categorical variable 135 | :param random_state: the numpy RandomState 136 | :param label_name: the column name of rht label, if any. Default is 'y' 137 | :return: A modified dataframe 138 | 139 | Example: 140 | 141 | create_categorical_features(df, [['a','b'], ['red','blue']]) 142 | 143 | """ 144 | random_state = get_random_state(random_state) 145 | 146 | df = df.copy() 147 | n_categorical = len(label_list) 148 | 149 | # get numeric columns ONCE so we don't have to do it every time we loop: 150 | numer_cols = [col for col in df.select_dtypes(include=['number']).columns if col != label_name] 151 | 152 | for i in range(0, n_categorical): 153 | # we might be out of numerical columns! 154 | if not numer_cols: 155 | break 156 | 157 | # chose a random numeric column that isn't y 158 | chosen_col = random_state.choice(numer_cols) 159 | # pop the chosen_col out of the numer_cols 160 | numer_cols.pop(numer_cols.index(chosen_col)) 161 | 162 | # use cut to convert that column to categorical 163 | df[chosen_col] = pd.cut(df[chosen_col], bins=len(label_list[i]), labels=label_list[i]) 164 | 165 | return df 166 | 167 | 168 | def create_classification_dataset(n_samples, n_features, n_informative, n_redundant, n_repeated, 169 | n_clusters_per_class, weights, n_classes, random_state=None, 170 | shuffle=True): 171 | """ 172 | Creates a binary classifier dataset 173 | 174 | :param n_samples: number of observations 175 | :param n_features: number of features 176 | :param n_informative: number of informative features 177 | :param n_redundant: number of multicolinear 178 | :param n_repeated: number of perfect collinear features 179 | :param n_clusters_per_class: gaussian clusters per class 180 | :param weights: list of class balances, e.g. [.5, .5] 181 | :param n_classes: the number of class levels 182 | :param random_state: the numpy RandomState 183 | :param shuffle: shuffle the samples and the features. 184 | :return: the requested dataframe 185 | """ 186 | random_state = get_random_state(random_state) 187 | X, y = make_classification(n_samples=n_samples, n_features=n_features, n_informative=n_informative, 188 | n_redundant=n_redundant, n_repeated=n_repeated, 189 | n_clusters_per_class=n_clusters_per_class, weights=weights, 190 | scale=(np.random.rand(n_features) * 10), n_classes=n_classes, 191 | random_state=random_state, shuffle=shuffle) 192 | # cast to a data frame 193 | df = pd.DataFrame(X) 194 | # rename X columns 195 | df = rename_columns(df) 196 | # and add the Y 197 | df['y'] = y 198 | return df 199 | 200 | 201 | def create_regression_dataset(n_samples, n_features, n_informative, effective_rank, tail_strength, 202 | noise, random_state=None, shuffle=True): 203 | """ 204 | Creates a regression dataset 205 | 206 | :param n_samples: number of observations 207 | :param n_features: number of features 208 | :param n_informative: number of informative features 209 | :param n_targets: The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar. 210 | :param effective_rank: approximate number of singular vectors required to explain data 211 | :param tail_strength: relative importance of the fat noisy tail of the singular values profile 212 | :param noise: standard deviation of the gaussian noise applied to the output 213 | :param random_state: the numpy RandomState 214 | :param shuffle: shuffle the samples and the features. 215 | :return: the requested dataframe 216 | """ 217 | random_state = get_random_state(random_state) 218 | X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative, 219 | n_targets=1, effective_rank=effective_rank, tail_strength=tail_strength, 220 | noise=noise, random_state=random_state, shuffle=shuffle) 221 | 222 | # cast to a data frame 223 | df = pd.DataFrame(X) 224 | # rename X columns 225 | df = rename_columns(df) 226 | # and add the Y 227 | df['y'] = y 228 | return df 229 | 230 | 231 | def make_star_schema(df, out_path="." + os.path.sep): 232 | """ 233 | Converts dataset to star-schema fact and dimension tables. Dimension tables are written out to CSV files, 234 | and the dataframe passed to the function is converted into a 'fact' table and returned as a dataframe (this 235 | file is NOT written out at this point because the fact table would be subject to test/train split functions, 236 | and dimension tables would not be). 237 | 238 | :param df: Source dataframe 239 | :param out_path: path to write the dimension files to 240 | :return: dataframe with dimension table 241 | """ 242 | def _get_categorical_columns(x): # don't shadow df from outer scope 243 | return x.select_dtypes(include=['category', 'object']).columns 244 | 245 | def _find_dollars(text): 246 | return 1 if re.match(r'^\$-?\d+\.?\d+', str(text)) else 0 247 | 248 | def _find_percentages(text): 249 | return 1 if re.search(r'^-?\d+\.?\d+[%]$', str(text)) else 0 250 | 251 | def _is_special_char(list_object): 252 | if list_object.dtype != 'O': 253 | return False 254 | else: 255 | percent_sum = sum(list_object.apply(_find_percentages)) 256 | dollars_sum = sum(list_object.apply(_find_dollars)) 257 | 258 | return (percent_sum / list_object.count() == 1) or (dollars_sum / list_object.count() == 1) 259 | 260 | # Get the categorical columns 261 | cols = _get_categorical_columns(df) 262 | assert len(cols) > 0, "No categorical variables exist in this dataset; star schema cannot be developed." 263 | 264 | # Iterate through the categorical columns 265 | for cat_column in cols: 266 | 267 | # Determine if the list includes requested entropy or not (NOTE: Decided not to make dimension 268 | # tables before this command so dimension keys CAN'T be selected for entropy) 269 | if not _is_special_char(df[cat_column]): # previously was "is not True" but not very pythonic 270 | 271 | # Turn the value counts into a dataframe 272 | vals = pd.DataFrame(df[cat_column].value_counts()) 273 | 274 | # todo: Sara, the following seems hacky... is there a better way to do this? 275 | # Reset the index to add index as the key 276 | vals.reset_index(inplace=True) # Puts the field names into the dataframe 277 | vals.reset_index(inplace=True) # Puts the index numbers in as integers 278 | 279 | # Name the column with the same name as the column 'value_count' 280 | vals.rename(index=str, 281 | columns={'level_0': 'primary_key', 282 | 'index': 'item', 283 | cat_column: 'value_count' 284 | }, 285 | inplace=True) 286 | 287 | # Make a df out of just the value and the mapping 288 | val_df = vals[['primary_key', 'item']] 289 | 290 | # todo: Sara, this is hacky (but really cool!) Could you please write a comment block 291 | # todo: ... explaining exactly what you're achieving here? 292 | # Make a dimension df by appending a NaN placeholder 293 | val_df.item.cat.add_categories('Not specified', inplace=True) 294 | val_df = val_df.append({'primary_key': -1, 'item': 'Not specified'}, ignore_index=True) 295 | 296 | # todo: Sara, should we take another param in this function that can either 297 | # todo: ... permit or prevent accidentally overwriting an existing file? 298 | # Write the new dimension table out to CSV 299 | dim_file_name = cat_column + '_dim.csv' 300 | val_df.to_csv(out_path + dim_file_name, index=False) 301 | 302 | # Set the index up for mapping 303 | val_df.set_index('item', inplace=True) 304 | 305 | # Convert to dict for mapping 306 | mapper = val_df.to_dict().get('primary_key') 307 | 308 | # Fill the NaNs in the dataframe's categorical column to 'Not Specified' 309 | df[cat_column].cat.add_categories('Not specified', inplace=True) 310 | df[cat_column].fillna('Not specified', inplace=True) 311 | 312 | # Insert new column into the dataframe 313 | df.insert(df.shape[1], cat_column + '_key', df[cat_column].map(mapper)) 314 | 315 | # Drop cat column from the dataframe 316 | df.drop(cat_column, axis=1, inplace=True) 317 | 318 | # Now, reset the dataframe's index and rename the index column as 'primary_key' 319 | df.reset_index(inplace=True) 320 | df_cols = df.columns 321 | df_cols = df_cols.delete(0) 322 | df_cols = df_cols.insert(0, 'primary_key') 323 | df.columns = df_cols 324 | 325 | # Return the main dataframe as a 'fact' table, which will then be split into test/train splits 326 | # dimension tables are immune to this 327 | return df.copy() 328 | 329 | 330 | def write_dataset(df, file_name, out_path="." + os.path.sep): 331 | """ 332 | Writes generated dataset to file 333 | 334 | :param df: dataframe to write 335 | :param file_name: beginning of filename 336 | :param out_path: the path to write the dataset 337 | :return: None 338 | """ 339 | # todo: Mike, do we want to take a param for overwriting existing files? 340 | df_train, df_testkey = train_test_split(df, test_size=.2) 341 | 342 | df_train.to_csv(out_path + file_name + "_train.csv", index=False) 343 | df_test = df_testkey.drop(['y'], axis=1) 344 | df_test.to_csv(out_path + file_name + "_test.csv", index=False) 345 | df_testkey.to_csv(out_path + file_name + "_testkey.csv", index=False) 346 | 347 | 348 | def make_dataset(config=None): 349 | """ 350 | Creates a machine learning dataset based on command line arguments passed 351 | 352 | :param config: a configuration dictionary, or None if called from the command line 353 | :return: None 354 | """ 355 | 356 | if config is None: 357 | # called from the command line so parse configuration 358 | args = parse_args(sys.argv[1:]) 359 | config = load_config(args['config']) 360 | 361 | print('-' * 80) 362 | c_type = config['type'] # avoid multiple lookups - fails with key error if not present 363 | if c_type not in ('regression', 'classification'): 364 | raise ValueError('type must be in ("regression", "classification"), but got %s' % c_type) 365 | reg = c_type == 'regression' 366 | 367 | # get defaults - these are the defaults from sklearn. 368 | def _safe_get_with_default(cfg, key, default): 369 | if key not in cfg: 370 | print("Warning: %s not in configuration, defaulting to %r" % (key, default)) 371 | return default 372 | return cfg[key] 373 | 374 | n_samples = _safe_get_with_default(config, 'n_samples', 100) 375 | n_features = _safe_get_with_default(config, 'n_features', 20 if not reg else 100) # diff defaults in sklearn 376 | n_informative = _safe_get_with_default(config, 'n_informative', 2 if not reg else 10) # diff defaults in sklearn 377 | n_redundant = _safe_get_with_default(config, 'n_redundant', 2) 378 | n_repeated = _safe_get_with_default(config, 'n_repeated', 0) 379 | n_clusters_per_class = _safe_get_with_default(config, 'n_clusters_per_class', 2) 380 | weights = _safe_get_with_default(config, 'weights', None) 381 | n_classes = _safe_get_with_default(config, 'n_classes', 2) 382 | effective_rank = _safe_get_with_default(config, 'effective_rank', None) 383 | tail_strength = _safe_get_with_default(config, 'tail_strength', 0.5) 384 | noise = _safe_get_with_default(config, 'noise', 0.) 385 | seed = _safe_get_with_default(config, 'random_seed', 42) 386 | shuffle = _safe_get_with_default(config, 'shuffle', True) 387 | 388 | # get the random state 389 | random_state = get_random_state(seed) 390 | 391 | # create the base dataset 392 | if not reg: 393 | print('Creating Classification Dataset...') 394 | df = create_classification_dataset(n_samples=n_samples, n_features=n_features, 395 | n_informative=n_informative, n_redundant=n_redundant, 396 | n_repeated=n_repeated, n_clusters_per_class=n_clusters_per_class, 397 | weights=weights, n_classes=n_classes, random_state=random_state, 398 | shuffle=shuffle) 399 | 400 | else: # elif c_type == 'regression': 401 | print('Creating Regression Dataset...') 402 | df = create_regression_dataset(n_samples=n_samples, n_features=n_features, 403 | n_informative=n_informative, effective_rank=effective_rank, 404 | tail_strength=tail_strength, noise=noise, random_state=random_state, 405 | shuffle=shuffle) 406 | 407 | # make sure to use safe lookups to avoid KeyErrors!!! 408 | label_list = _safe_get_with_default(config, 'label_list', None) 409 | do_categorical = label_list is not None and len(label_list) > 0 410 | 411 | if do_categorical: 412 | print("Creating Categorical Features...") 413 | 414 | df = create_categorical_features(df, label_list, random_state=random_state) 415 | 416 | # insert entropy 417 | insert_dollar = _safe_get_with_default(config, 'insert_dollar', "No") 418 | insert_percent = _safe_get_with_default(config, 'insert_percent', "No") 419 | 420 | if any(entropy == "Yes" for entropy in (insert_dollar, insert_percent)): 421 | print("Inserting Requested Entropy...") 422 | 423 | # add $ or % column if requested 424 | if insert_dollar == "Yes": 425 | df = insert_special_char('$', df, random_state=random_state) 426 | if insert_percent == "Yes": 427 | df = insert_special_char('%', df, random_state=random_state) 428 | 429 | # insert missing values 430 | pct_missing = _safe_get_with_default(config, 'pct_missing', None) 431 | df = insert_missing_values(df, pct_missing, random_state=random_state) 432 | 433 | # Convert dataset to star schema if requested 434 | star_schema = _safe_get_with_default(config, 'star_schema', "No") 435 | outpath = _safe_get_with_default(config, 'out_path', "." + os.path.sep) 436 | if star_schema == "Yes": 437 | # Check the number of categorical variables 438 | if do_categorical: 439 | df = make_star_schema(df, outpath) 440 | else: 441 | print("No categorical variables added. Dataset cannot be transformed into a star schema. " 442 | "Dataset will be generated as a single-table dataset...") 443 | 444 | print("Writing Train/Test Datasets") 445 | write_dataset(df, _safe_get_with_default(config, 'output', 'my_dataset'), outpath) 446 | 447 | 448 | if __name__ == "__main__": 449 | make_dataset() 450 | -------------------------------------------------------------------------------- /example/student_predictions.csv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 0 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 0 18 | 0 19 | 0 20 | 0 21 | 0 22 | 0 23 | 0 24 | 0 25 | 0 26 | 0 27 | 0 28 | 0 29 | 0 30 | 0 31 | 0 32 | 1 33 | 0 34 | 0 35 | 0 36 | 0 37 | 0 38 | 0 39 | 0 40 | 0 41 | 0 42 | 0 43 | 0 44 | 0 45 | 0 46 | 0 47 | 0 48 | 0 49 | 0 50 | 0 51 | 0 52 | 0 53 | 0 54 | 0 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 0 65 | 0 66 | 0 67 | 0 68 | 0 69 | 0 70 | 0 71 | 0 72 | 0 73 | 0 74 | 0 75 | 0 76 | 0 77 | 0 78 | 0 79 | 0 80 | 0 81 | 0 82 | 1 83 | 0 84 | 0 85 | 0 86 | 0 87 | 0 88 | 0 89 | 0 90 | 0 91 | 0 92 | 0 93 | 0 94 | 0 95 | 0 96 | 0 97 | 0 98 | 0 99 | 0 100 | 0 101 | 0 102 | 0 103 | 0 104 | 0 105 | 0 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 0 113 | 0 114 | 0 115 | 0 116 | 0 117 | 0 118 | 0 119 | 0 120 | 0 121 | 0 122 | 1 123 | 0 124 | 0 125 | 0 126 | 1 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 0 137 | 0 138 | 0 139 | 0 140 | 0 141 | 0 142 | 0 143 | 0 144 | 0 145 | 0 146 | 0 147 | 1 148 | 0 149 | 0 150 | 1 151 | 0 152 | 0 153 | 0 154 | 0 155 | 0 156 | 0 157 | 0 158 | 0 159 | 0 160 | 0 161 | 0 162 | 0 163 | 0 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 0 170 | 0 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 0 177 | 0 178 | 0 179 | 0 180 | 0 181 | 0 182 | 0 183 | 0 184 | 0 185 | 0 186 | 0 187 | 0 188 | 0 189 | 0 190 | 0 191 | 0 192 | 0 193 | 0 194 | 0 195 | 0 196 | 0 197 | 0 198 | 0 199 | 1 200 | 0 201 | 0 202 | 0 203 | 0 204 | 0 205 | 0 206 | 0 207 | 0 208 | 0 209 | 0 210 | 0 211 | 0 212 | 0 213 | 0 214 | 0 215 | 0 216 | 0 217 | 0 218 | 0 219 | 0 220 | 0 221 | 0 222 | 0 223 | 0 224 | 0 225 | 0 226 | 0 227 | 0 228 | 0 229 | 0 230 | 0 231 | 0 232 | 0 233 | 0 234 | 0 235 | 0 236 | 0 237 | 0 238 | 0 239 | 0 240 | 0 241 | 0 242 | 0 243 | 0 244 | 0 245 | 0 246 | 0 247 | 0 248 | 0 249 | 0 250 | 0 251 | 0 252 | 0 253 | 0 254 | 0 255 | 0 256 | 0 257 | 0 258 | 0 259 | 0 260 | 0 261 | 0 262 | 0 263 | 0 264 | 1 265 | 0 266 | 0 267 | 0 268 | 0 269 | 0 270 | 0 271 | 0 272 | 0 273 | 0 274 | 0 275 | 0 276 | 0 277 | 0 278 | 0 279 | 0 280 | 0 281 | 0 282 | 0 283 | 0 284 | 0 285 | 0 286 | 0 287 | 1 288 | 0 289 | 0 290 | 0 291 | 0 292 | 0 293 | 0 294 | 0 295 | 0 296 | 0 297 | 0 298 | 0 299 | 0 300 | 0 301 | 0 302 | 0 303 | 0 304 | 0 305 | 0 306 | 0 307 | 0 308 | 0 309 | 0 310 | 0 311 | 0 312 | 0 313 | 0 314 | 0 315 | 0 316 | 0 317 | 0 318 | 0 319 | 0 320 | 0 321 | 0 322 | 0 323 | 0 324 | 0 325 | 0 326 | 0 327 | 0 328 | 0 329 | 0 330 | 0 331 | 0 332 | 0 333 | 0 334 | 0 335 | 0 336 | 0 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 1 344 | 0 345 | 0 346 | 0 347 | 0 348 | 0 349 | 0 350 | 0 351 | 0 352 | 0 353 | 0 354 | 0 355 | 0 356 | 0 357 | 0 358 | 0 359 | 0 360 | 0 361 | 0 362 | 0 363 | 0 364 | 0 365 | 0 366 | 0 367 | 0 368 | 0 369 | 1 370 | 0 371 | 0 372 | 0 373 | 0 374 | 0 375 | 0 376 | 0 377 | 0 378 | 0 379 | 0 380 | 1 381 | 1 382 | 0 383 | 0 384 | 0 385 | 0 386 | 0 387 | 0 388 | 0 389 | 0 390 | 0 391 | 0 392 | 0 393 | 0 394 | 0 395 | 0 396 | 0 397 | 0 398 | 0 399 | 0 400 | 0 401 | 0 402 | 0 403 | 0 404 | 0 405 | 0 406 | 0 407 | 0 408 | 0 409 | 0 410 | 0 411 | 0 412 | 0 413 | 0 414 | 0 415 | 0 416 | 0 417 | 0 418 | 0 419 | 0 420 | 0 421 | 0 422 | 0 423 | 0 424 | 0 425 | 1 426 | 0 427 | 1 428 | 0 429 | 0 430 | 0 431 | 0 432 | 0 433 | 0 434 | 0 435 | 0 436 | 0 437 | 0 438 | 0 439 | 0 440 | 0 441 | 0 442 | 0 443 | 0 444 | 0 445 | 0 446 | 0 447 | 0 448 | 0 449 | 0 450 | 0 451 | 0 452 | 0 453 | 1 454 | 0 455 | 0 456 | 0 457 | 0 458 | 0 459 | 0 460 | 0 461 | 0 462 | 0 463 | 0 464 | 0 465 | 0 466 | 0 467 | 0 468 | 0 469 | 0 470 | 0 471 | 0 472 | 0 473 | 0 474 | 0 475 | 0 476 | 0 477 | 0 478 | 0 479 | 0 480 | 0 481 | 0 482 | 0 483 | 0 484 | 0 485 | 0 486 | 0 487 | 0 488 | 0 489 | 0 490 | 0 491 | 0 492 | 0 493 | 0 494 | 0 495 | 0 496 | 0 497 | 0 498 | 0 499 | 0 500 | 0 501 | 0 502 | 0 503 | 0 504 | 0 505 | 0 506 | 0 507 | 1 508 | 1 509 | 0 510 | 0 511 | 0 512 | 0 513 | 0 514 | 0 515 | 0 516 | 0 517 | 0 518 | 0 519 | 0 520 | 0 521 | 0 522 | 0 523 | 0 524 | 0 525 | 0 526 | 0 527 | 0 528 | 0 529 | 0 530 | 0 531 | 0 532 | 0 533 | 0 534 | 0 535 | 0 536 | 0 537 | 0 538 | 0 539 | 0 540 | 0 541 | 0 542 | 0 543 | 0 544 | 0 545 | 0 546 | 0 547 | 0 548 | 0 549 | 0 550 | 0 551 | 0 552 | 0 553 | 0 554 | 0 555 | 0 556 | 0 557 | 0 558 | 0 559 | 0 560 | 0 561 | 0 562 | 0 563 | 0 564 | 0 565 | 0 566 | 0 567 | 0 568 | 0 569 | 0 570 | 0 571 | 0 572 | 0 573 | 0 574 | 0 575 | 0 576 | 0 577 | 0 578 | 0 579 | 0 580 | 0 581 | 0 582 | 0 583 | 0 584 | 0 585 | 0 586 | 0 587 | 0 588 | 0 589 | 0 590 | 0 591 | 0 592 | 0 593 | 0 594 | 0 595 | 0 596 | 0 597 | 0 598 | 0 599 | 0 600 | 0 601 | 0 602 | 0 603 | 0 604 | 0 605 | 0 606 | 0 607 | 0 608 | 0 609 | 0 610 | 0 611 | 0 612 | 0 613 | 0 614 | 0 615 | 0 616 | 0 617 | 0 618 | 0 619 | 0 620 | 0 621 | 0 622 | 0 623 | 0 624 | 1 625 | 0 626 | 0 627 | 0 628 | 0 629 | 0 630 | 0 631 | 0 632 | 0 633 | 0 634 | 0 635 | 0 636 | 0 637 | 0 638 | 0 639 | 0 640 | 0 641 | 0 642 | 0 643 | 0 644 | 0 645 | 0 646 | 0 647 | 0 648 | 0 649 | 0 650 | 0 651 | 0 652 | 0 653 | 0 654 | 0 655 | 0 656 | 0 657 | 0 658 | 0 659 | 0 660 | 0 661 | 0 662 | 0 663 | 0 664 | 0 665 | 0 666 | 0 667 | 0 668 | 0 669 | 0 670 | 0 671 | 0 672 | 0 673 | 0 674 | 0 675 | 0 676 | 0 677 | 0 678 | 0 679 | 0 680 | 0 681 | 0 682 | 0 683 | 0 684 | 0 685 | 0 686 | 0 687 | 0 688 | 0 689 | 0 690 | 0 691 | 0 692 | 0 693 | 0 694 | 0 695 | 0 696 | 0 697 | 0 698 | 0 699 | 0 700 | 0 701 | 0 702 | 0 703 | 0 704 | 0 705 | 0 706 | 0 707 | 0 708 | 0 709 | 0 710 | 0 711 | 0 712 | 0 713 | 0 714 | 0 715 | 0 716 | 0 717 | 0 718 | 0 719 | 0 720 | 0 721 | 0 722 | 0 723 | 0 724 | 0 725 | 0 726 | 0 727 | 0 728 | 0 729 | 0 730 | 0 731 | 0 732 | 0 733 | 0 734 | 0 735 | 0 736 | 0 737 | 0 738 | 0 739 | 0 740 | 0 741 | 0 742 | 0 743 | 0 744 | 0 745 | 0 746 | 0 747 | 0 748 | 0 749 | 0 750 | 0 751 | 0 752 | 0 753 | 0 754 | 0 755 | 0 756 | 0 757 | 0 758 | 0 759 | 0 760 | 0 761 | 0 762 | 0 763 | 0 764 | 0 765 | 0 766 | 0 767 | 0 768 | 0 769 | 0 770 | 0 771 | 0 772 | 0 773 | 0 774 | 0 775 | 0 776 | 0 777 | 0 778 | 0 779 | 0 780 | 0 781 | 0 782 | 0 783 | 0 784 | 1 785 | 0 786 | 0 787 | 0 788 | 0 789 | 0 790 | 0 791 | 0 792 | 0 793 | 0 794 | 0 795 | 0 796 | 0 797 | 0 798 | 0 799 | 0 800 | 0 801 | 0 802 | 0 803 | 0 804 | 0 805 | 0 806 | 0 807 | 0 808 | 0 809 | 0 810 | 0 811 | 0 812 | 0 813 | 0 814 | 0 815 | 0 816 | 0 817 | 0 818 | 0 819 | 0 820 | 0 821 | 0 822 | 0 823 | 0 824 | 0 825 | 0 826 | 0 827 | 0 828 | 0 829 | 0 830 | 0 831 | 0 832 | 0 833 | 0 834 | 0 835 | 0 836 | 0 837 | 0 838 | 0 839 | 0 840 | 0 841 | 0 842 | 0 843 | 0 844 | 0 845 | 0 846 | 0 847 | 0 848 | 0 849 | 0 850 | 0 851 | 0 852 | 0 853 | 0 854 | 0 855 | 0 856 | 0 857 | 0 858 | 0 859 | 0 860 | 0 861 | 0 862 | 0 863 | 0 864 | 0 865 | 0 866 | 0 867 | 0 868 | 0 869 | 0 870 | 0 871 | 0 872 | 0 873 | 0 874 | 0 875 | 0 876 | 0 877 | 0 878 | 0 879 | 0 880 | 0 881 | 0 882 | 0 883 | 0 884 | 0 885 | 0 886 | 0 887 | 0 888 | 0 889 | 0 890 | 0 891 | 0 892 | 0 893 | 0 894 | 0 895 | 1 896 | 0 897 | 0 898 | 0 899 | 0 900 | 0 901 | 0 902 | 0 903 | 0 904 | 0 905 | 0 906 | 0 907 | 0 908 | 0 909 | 0 910 | 0 911 | 0 912 | 0 913 | 0 914 | 0 915 | 0 916 | 0 917 | 0 918 | 0 919 | 0 920 | 0 921 | 0 922 | 0 923 | 0 924 | 0 925 | 0 926 | 0 927 | 0 928 | 0 929 | 0 930 | 0 931 | 0 932 | 0 933 | 0 934 | 0 935 | 0 936 | 0 937 | 0 938 | 0 939 | 0 940 | 0 941 | 0 942 | 0 943 | 0 944 | 0 945 | 0 946 | 0 947 | 0 948 | 0 949 | 0 950 | 0 951 | 0 952 | 0 953 | 0 954 | 0 955 | 0 956 | 0 957 | 0 958 | 0 959 | 0 960 | 0 961 | 0 962 | 0 963 | 0 964 | 0 965 | 0 966 | 0 967 | 0 968 | 0 969 | 0 970 | 0 971 | 0 972 | 0 973 | 0 974 | 0 975 | 0 976 | 0 977 | 0 978 | 0 979 | 0 980 | 0 981 | 0 982 | 0 983 | 0 984 | 0 985 | 0 986 | 0 987 | 0 988 | 0 989 | 1 990 | 1 991 | 0 992 | 0 993 | 0 994 | 0 995 | 0 996 | 0 997 | 0 998 | 0 999 | 0 1000 | 0 1001 | 0 1002 | 0 1003 | 0 1004 | 0 1005 | 0 1006 | 0 1007 | 0 1008 | 0 1009 | 0 1010 | 1 1011 | 0 1012 | 0 1013 | 0 1014 | 0 1015 | 0 1016 | 0 1017 | 0 1018 | 0 1019 | 0 1020 | 0 1021 | 0 1022 | 0 1023 | 0 1024 | 0 1025 | 0 1026 | 0 1027 | 0 1028 | 0 1029 | 0 1030 | 0 1031 | 0 1032 | 0 1033 | 0 1034 | 0 1035 | 0 1036 | 0 1037 | 0 1038 | 0 1039 | 0 1040 | 0 1041 | 0 1042 | 0 1043 | 0 1044 | 0 1045 | 0 1046 | 0 1047 | 0 1048 | 0 1049 | 0 1050 | 0 1051 | 0 1052 | 0 1053 | 0 1054 | 0 1055 | 0 1056 | 0 1057 | 0 1058 | 0 1059 | 0 1060 | 0 1061 | 0 1062 | 0 1063 | 1 1064 | 0 1065 | 0 1066 | 0 1067 | 0 1068 | 0 1069 | 0 1070 | 0 1071 | 0 1072 | 0 1073 | 0 1074 | 0 1075 | 0 1076 | 0 1077 | 0 1078 | 0 1079 | 0 1080 | 0 1081 | 0 1082 | 0 1083 | 0 1084 | 0 1085 | 0 1086 | 1 1087 | 0 1088 | 0 1089 | 0 1090 | 0 1091 | 0 1092 | 0 1093 | 0 1094 | 0 1095 | 0 1096 | 0 1097 | 0 1098 | 0 1099 | 0 1100 | 0 1101 | 0 1102 | 0 1103 | 0 1104 | 0 1105 | 0 1106 | 0 1107 | 0 1108 | 0 1109 | 0 1110 | 0 1111 | 0 1112 | 0 1113 | 0 1114 | 0 1115 | 0 1116 | 0 1117 | 0 1118 | 0 1119 | 0 1120 | 0 1121 | 0 1122 | 0 1123 | 0 1124 | 0 1125 | 0 1126 | 0 1127 | 0 1128 | 0 1129 | 0 1130 | 0 1131 | 0 1132 | 0 1133 | 0 1134 | 0 1135 | 0 1136 | 1 1137 | 0 1138 | 1 1139 | 0 1140 | 0 1141 | 0 1142 | 0 1143 | 0 1144 | 0 1145 | 0 1146 | 0 1147 | 0 1148 | 0 1149 | 0 1150 | 0 1151 | 0 1152 | 0 1153 | 0 1154 | 0 1155 | 0 1156 | 0 1157 | 0 1158 | 0 1159 | 1 1160 | 0 1161 | 0 1162 | 0 1163 | 0 1164 | 0 1165 | 0 1166 | 0 1167 | 0 1168 | 0 1169 | 0 1170 | 0 1171 | 0 1172 | 0 1173 | 0 1174 | 0 1175 | 0 1176 | 0 1177 | 0 1178 | 0 1179 | 0 1180 | 0 1181 | 0 1182 | 0 1183 | 0 1184 | 0 1185 | 0 1186 | 0 1187 | 0 1188 | 0 1189 | 0 1190 | 0 1191 | 0 1192 | 0 1193 | 0 1194 | 0 1195 | 0 1196 | 0 1197 | 0 1198 | 0 1199 | 0 1200 | 0 1201 | 0 1202 | 0 1203 | 0 1204 | 0 1205 | 0 1206 | 0 1207 | 0 1208 | 0 1209 | 0 1210 | 0 1211 | 0 1212 | 0 1213 | 0 1214 | 0 1215 | 0 1216 | 0 1217 | 0 1218 | 0 1219 | 0 1220 | 0 1221 | 0 1222 | 0 1223 | 0 1224 | 1 1225 | 0 1226 | 0 1227 | 0 1228 | 0 1229 | 0 1230 | 0 1231 | 0 1232 | 0 1233 | 0 1234 | 0 1235 | 0 1236 | 0 1237 | 0 1238 | 0 1239 | 0 1240 | 0 1241 | 0 1242 | 0 1243 | 0 1244 | 0 1245 | 0 1246 | 0 1247 | 0 1248 | 0 1249 | 0 1250 | 0 1251 | 0 1252 | 0 1253 | 0 1254 | 0 1255 | 0 1256 | 0 1257 | 0 1258 | 0 1259 | 0 1260 | 0 1261 | 0 1262 | 0 1263 | 0 1264 | 0 1265 | 0 1266 | 0 1267 | 0 1268 | 0 1269 | 0 1270 | 0 1271 | 0 1272 | 0 1273 | 0 1274 | 0 1275 | 0 1276 | 0 1277 | 0 1278 | 0 1279 | 0 1280 | 0 1281 | 0 1282 | 0 1283 | 0 1284 | 0 1285 | 0 1286 | 0 1287 | 0 1288 | 0 1289 | 0 1290 | 0 1291 | 0 1292 | 0 1293 | 0 1294 | 0 1295 | 0 1296 | 0 1297 | 0 1298 | 0 1299 | 0 1300 | 0 1301 | 0 1302 | 0 1303 | 0 1304 | 0 1305 | 0 1306 | 0 1307 | 0 1308 | 0 1309 | 0 1310 | 0 1311 | 0 1312 | 0 1313 | 0 1314 | 0 1315 | 0 1316 | 0 1317 | 0 1318 | 0 1319 | 0 1320 | 0 1321 | 0 1322 | 0 1323 | 0 1324 | 0 1325 | 0 1326 | 0 1327 | 0 1328 | 0 1329 | 0 1330 | 0 1331 | 0 1332 | 0 1333 | 0 1334 | 0 1335 | 0 1336 | 0 1337 | 0 1338 | 0 1339 | 0 1340 | 0 1341 | 1 1342 | 0 1343 | 0 1344 | 0 1345 | 0 1346 | 0 1347 | 0 1348 | 0 1349 | 0 1350 | 0 1351 | 0 1352 | 0 1353 | 0 1354 | 0 1355 | 0 1356 | 0 1357 | 0 1358 | 0 1359 | 0 1360 | 0 1361 | 0 1362 | 0 1363 | 0 1364 | 0 1365 | 0 1366 | 0 1367 | 0 1368 | 0 1369 | 0 1370 | 0 1371 | 0 1372 | 0 1373 | 0 1374 | 0 1375 | 0 1376 | 1 1377 | 0 1378 | 0 1379 | 0 1380 | 0 1381 | 0 1382 | 0 1383 | 0 1384 | 0 1385 | 0 1386 | 0 1387 | 0 1388 | 0 1389 | 0 1390 | 0 1391 | 0 1392 | 0 1393 | 0 1394 | 0 1395 | 0 1396 | 0 1397 | 0 1398 | 0 1399 | 0 1400 | 0 1401 | 0 1402 | 1 1403 | 0 1404 | 0 1405 | 0 1406 | 0 1407 | 0 1408 | 0 1409 | 0 1410 | 0 1411 | 0 1412 | 0 1413 | 0 1414 | 0 1415 | 0 1416 | 0 1417 | 0 1418 | 0 1419 | 0 1420 | 0 1421 | 0 1422 | 0 1423 | 0 1424 | 0 1425 | 0 1426 | 0 1427 | 0 1428 | 0 1429 | 0 1430 | 0 1431 | 0 1432 | 0 1433 | 0 1434 | 0 1435 | 0 1436 | 0 1437 | 0 1438 | 0 1439 | 0 1440 | 0 1441 | 0 1442 | 0 1443 | 0 1444 | 0 1445 | 0 1446 | 0 1447 | 0 1448 | 0 1449 | 0 1450 | 0 1451 | 0 1452 | 0 1453 | 0 1454 | 0 1455 | 0 1456 | 0 1457 | 0 1458 | 0 1459 | 0 1460 | 0 1461 | 0 1462 | 0 1463 | 0 1464 | 0 1465 | 0 1466 | 0 1467 | 0 1468 | 0 1469 | 0 1470 | 0 1471 | 0 1472 | 0 1473 | 0 1474 | 0 1475 | 0 1476 | 0 1477 | 0 1478 | 0 1479 | 0 1480 | 0 1481 | 0 1482 | 0 1483 | 0 1484 | 0 1485 | 0 1486 | 0 1487 | 0 1488 | 0 1489 | 0 1490 | 0 1491 | 0 1492 | 0 1493 | 0 1494 | 0 1495 | 0 1496 | 0 1497 | 0 1498 | 0 1499 | 0 1500 | 0 1501 | 0 1502 | 0 1503 | 1 1504 | 0 1505 | 0 1506 | 0 1507 | 0 1508 | 0 1509 | 0 1510 | 0 1511 | 0 1512 | 0 1513 | 0 1514 | 0 1515 | 0 1516 | 0 1517 | 0 1518 | 0 1519 | 0 1520 | 0 1521 | 0 1522 | 0 1523 | 0 1524 | 0 1525 | 0 1526 | 0 1527 | 0 1528 | 0 1529 | 0 1530 | 0 1531 | 0 1532 | 0 1533 | 0 1534 | 0 1535 | 0 1536 | 0 1537 | 0 1538 | 0 1539 | 0 1540 | 0 1541 | 0 1542 | 0 1543 | 1 1544 | 0 1545 | 1 1546 | 0 1547 | 0 1548 | 0 1549 | 0 1550 | 0 1551 | 0 1552 | 0 1553 | 0 1554 | 0 1555 | 0 1556 | 0 1557 | 1 1558 | 0 1559 | 0 1560 | 0 1561 | 0 1562 | 0 1563 | 0 1564 | 0 1565 | 0 1566 | 0 1567 | 0 1568 | 0 1569 | 0 1570 | 0 1571 | 0 1572 | 0 1573 | 0 1574 | 0 1575 | 0 1576 | 0 1577 | 0 1578 | 0 1579 | 0 1580 | 0 1581 | 0 1582 | 0 1583 | 0 1584 | 0 1585 | 0 1586 | 0 1587 | 0 1588 | 0 1589 | 0 1590 | 0 1591 | 0 1592 | 0 1593 | 0 1594 | 0 1595 | 0 1596 | 0 1597 | 0 1598 | 0 1599 | 0 1600 | 0 1601 | 0 1602 | 0 1603 | 0 1604 | 0 1605 | 0 1606 | 0 1607 | 0 1608 | 0 1609 | 0 1610 | 0 1611 | 1 1612 | 0 1613 | 0 1614 | 0 1615 | 0 1616 | 0 1617 | 0 1618 | 1 1619 | 0 1620 | 0 1621 | 0 1622 | 0 1623 | 0 1624 | 0 1625 | 0 1626 | 0 1627 | 0 1628 | 0 1629 | 0 1630 | 0 1631 | 0 1632 | 0 1633 | 0 1634 | 0 1635 | 0 1636 | 0 1637 | 0 1638 | 0 1639 | 0 1640 | 0 1641 | 0 1642 | 0 1643 | 0 1644 | 0 1645 | 0 1646 | 0 1647 | 0 1648 | 0 1649 | 0 1650 | 0 1651 | 0 1652 | 0 1653 | 0 1654 | 0 1655 | 0 1656 | 0 1657 | 0 1658 | 0 1659 | 0 1660 | 0 1661 | 0 1662 | 0 1663 | 0 1664 | 0 1665 | 0 1666 | 0 1667 | 0 1668 | 0 1669 | 0 1670 | 0 1671 | 0 1672 | 0 1673 | 1 1674 | 0 1675 | 0 1676 | 0 1677 | 0 1678 | 0 1679 | 0 1680 | 0 1681 | 0 1682 | 0 1683 | 0 1684 | 0 1685 | 0 1686 | 0 1687 | 0 1688 | 0 1689 | 0 1690 | 0 1691 | 0 1692 | 0 1693 | 0 1694 | 0 1695 | 0 1696 | 0 1697 | 0 1698 | 0 1699 | 0 1700 | 0 1701 | 0 1702 | 0 1703 | 1 1704 | 0 1705 | 0 1706 | 0 1707 | 0 1708 | 0 1709 | 0 1710 | 0 1711 | 0 1712 | 0 1713 | 0 1714 | 0 1715 | 0 1716 | 0 1717 | 0 1718 | 0 1719 | 1 1720 | 0 1721 | 0 1722 | 0 1723 | 0 1724 | 0 1725 | 0 1726 | 0 1727 | 0 1728 | 0 1729 | 0 1730 | 0 1731 | 0 1732 | 0 1733 | 0 1734 | 0 1735 | 0 1736 | 0 1737 | 0 1738 | 0 1739 | 0 1740 | 0 1741 | 0 1742 | 0 1743 | 0 1744 | 0 1745 | 0 1746 | 0 1747 | 0 1748 | 0 1749 | 0 1750 | 0 1751 | 0 1752 | 0 1753 | 0 1754 | 0 1755 | 0 1756 | 0 1757 | 0 1758 | 0 1759 | 0 1760 | 0 1761 | 0 1762 | 0 1763 | 0 1764 | 0 1765 | 0 1766 | 0 1767 | 0 1768 | 0 1769 | 0 1770 | 0 1771 | 0 1772 | 0 1773 | 0 1774 | 0 1775 | 0 1776 | 0 1777 | 0 1778 | 0 1779 | 0 1780 | 0 1781 | 0 1782 | 0 1783 | 0 1784 | 0 1785 | 0 1786 | 0 1787 | 0 1788 | 0 1789 | 0 1790 | 0 1791 | 0 1792 | 0 1793 | 0 1794 | 0 1795 | 0 1796 | 0 1797 | 0 1798 | 0 1799 | 0 1800 | 0 1801 | 0 1802 | 0 1803 | 0 1804 | 0 1805 | 0 1806 | 0 1807 | 0 1808 | 0 1809 | 0 1810 | 0 1811 | 0 1812 | 0 1813 | 0 1814 | 0 1815 | 0 1816 | 0 1817 | 0 1818 | 1 1819 | 0 1820 | 0 1821 | 0 1822 | 0 1823 | 0 1824 | 0 1825 | 0 1826 | 0 1827 | 0 1828 | 0 1829 | 0 1830 | 0 1831 | 0 1832 | 0 1833 | 0 1834 | 0 1835 | 0 1836 | 0 1837 | 0 1838 | 0 1839 | 0 1840 | 0 1841 | 0 1842 | 0 1843 | 0 1844 | 0 1845 | 0 1846 | 0 1847 | 0 1848 | 0 1849 | 0 1850 | 0 1851 | 0 1852 | 0 1853 | 0 1854 | 0 1855 | 0 1856 | 0 1857 | 0 1858 | 0 1859 | 0 1860 | 0 1861 | 0 1862 | 0 1863 | 0 1864 | 0 1865 | 0 1866 | 0 1867 | 0 1868 | 1 1869 | 0 1870 | 0 1871 | 0 1872 | 0 1873 | 0 1874 | 0 1875 | 0 1876 | 0 1877 | 0 1878 | 0 1879 | 0 1880 | 0 1881 | 0 1882 | 0 1883 | 0 1884 | 0 1885 | 0 1886 | 0 1887 | 0 1888 | 0 1889 | 0 1890 | 0 1891 | 0 1892 | 0 1893 | 0 1894 | 0 1895 | 0 1896 | 0 1897 | 0 1898 | 0 1899 | 0 1900 | 0 1901 | 0 1902 | 0 1903 | 0 1904 | 0 1905 | 0 1906 | 0 1907 | 0 1908 | 0 1909 | 0 1910 | 0 1911 | 0 1912 | 0 1913 | 0 1914 | 0 1915 | 0 1916 | 0 1917 | 0 1918 | 0 1919 | 0 1920 | 0 1921 | 0 1922 | 0 1923 | 0 1924 | 0 1925 | 0 1926 | 0 1927 | 0 1928 | 0 1929 | 0 1930 | 0 1931 | 0 1932 | 0 1933 | 0 1934 | 0 1935 | 0 1936 | 0 1937 | 0 1938 | 0 1939 | 1 1940 | 0 1941 | 0 1942 | 0 1943 | 0 1944 | 0 1945 | 0 1946 | 0 1947 | 0 1948 | 0 1949 | 0 1950 | 0 1951 | 0 1952 | 0 1953 | 0 1954 | 0 1955 | 0 1956 | 0 1957 | 1 1958 | 0 1959 | 0 1960 | 0 1961 | 0 1962 | 0 1963 | 1 1964 | 0 1965 | 0 1966 | 0 1967 | 0 1968 | 0 1969 | 0 1970 | 0 1971 | 0 1972 | 0 1973 | 0 1974 | 0 1975 | 0 1976 | 0 1977 | 0 1978 | 0 1979 | 0 1980 | 0 1981 | 0 1982 | 0 1983 | 0 1984 | 0 1985 | 1 1986 | 0 1987 | 0 1988 | 0 1989 | 0 1990 | 0 1991 | 0 1992 | 0 1993 | 0 1994 | 0 1995 | 0 1996 | 0 1997 | 0 1998 | 0 1999 | 0 2000 | 0 2001 | -------------------------------------------------------------------------------- /snape/flicker.py: -------------------------------------------------------------------------------- 1 | 2 | junk_image1 = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\xf4\x00\x00\x01v\x08\x00\x00\x00\x00\xbd\xc55\x85\x00\x00\x07\xcaIDATx\x01\xec\xd9m\xeb\xd2P\x1c\xc6\xf1\xff\xfb\x7f)g\xe1)oB\x0ef\x92\xe2\xcd\n\x94\xac\xa8\x19Ef\xa93l\xd8\xb4:n\xfe\xba\xda\xa6\xab\xe7\xff\'\x1e\xaf/x\xcdm\x0f?0\x06\xbb\xdb\xb2\x9b\xebn+\xec\xc6":\xd1\x19\xd1\x19\xd1\x19\xd1\x19\xd1\x19\xd1\x19\xd1\xd9}\x17\xf5\xb4\xf2F\xf2\xcc\x98\xb6D\xe3\xf1x)e_\x8c1\x9f\x88\xee\\{\xad\xd0P\xeaJiY\xe0\xef+)\xfb\x88\xf3\xc09t6\x85km0#\xfa-\xd5\x85\xebN$G\xff\x15\x86a\xec8:\xb3\xb6\rWk\x8f9zj\xadM\xe4o\xbb\xd5zwAOp\xf9\x98\xc6q\xfcS\xa2\xc5\x86\xe8W\x9e*\xea\xfd\xf7xO\xdfT\x15j\x14\xe8I\x13\x87\xc5wL\xab\x8bq\x11\x9d\xe8\xb6\xa5\xb2\xaa\x05\xfa\x04;\x11\xa0gi\xa2_y\xbe\xff\x10\x8e\xbe\x1f\xfc\x8b\xfe\x1c\xeb\xf5_\x0e;9z\x88y\x92^\xd0\xebD\xbf\xfaL\xf1\xc0.\xd1\x7f{\xd8%.\x9d2\xf4\xd7\xb8\xf3\xe8 9\xba\x9e\xad\xbf:\x88N\xf4\x15\xa6S\xbe\xbdW\xf0[I\x81\xfe\xc1\xcd\xb7w\xa2\xcf1~\x89\xae\xf1\x9b\x9f\xd1\x0fn\xa2\x13}\x83i\x9e.\xe8S\x8d;\xe7\xc7\xbb%\xba\x9b\xe8\xa9\xc6\xbe8\x8a\xec3\xf4\xe0-\xa6\xe36:\xd1%P\xa8\xd2z\xac\x92\x0c=m`\xdf\xb9\x8dNt\x99\xa8<\x9b\xa1\xcbg\xec\x83\xc8\x1dt60\xc6\xe0\x90\x7fZ]\xe3\xe4=N\xbe\x8dj\xca\xab\xf6\x92\xe2\xd3j\x1f\x87\xd1\x8f\xa7\xe8\x0f;w\xb3\xda6\x10\x05`\xf4\xfd_\xe5\x92(\xf4\x87&V\xf0\xc2\tz\to\xda\x85\x1f ?\xc5\x91l\x8c\x1ar\x17\x85\xc9N\x85\x82f\xce\xb7\x1a\x06\xb4: \x86\xe12\xe7\xa6\xd1\x05\x1d\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b:tA\x17tA\x17tA\x17tA\x17tA\x17tA\x17tA\x17tA\x17t\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\xd0\x05]\xd0\x05]\xd0\x05}\x95A\x17tA\x17tA\x17tA\x17tA\x17tA\x17tA\x17tA\x17tA\x87.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82.\xe8\x82\x0e]\xd0\x05]\xd0\x05]\xd0\x05]\xd0\x05]\xd0\x05]\xd0\x05]\xd0\x05]\xd0\x05\x1d\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xfae\xbf\xed\xba\xed\xfe\x02\xbd\x9d\x9e\xbe\xc5G\xdd3\xf4V:^\xc7\x97\xc3\xe9t\xb8\x8e\xeb7\xe8m4~\x8d\xab\xd7\xf9\xbd\xdfW\xf1}\x82\xdeB\xe3]\xc4\xcf\\\xfe\x8a\xb8\x9f\xa0\xb7a\x1e\xa7\\\x9f#\xd5\xa1Wo~w\xf5\x17\xbd\xebS\x1dz\xe5\xe6\xe36\x0e\xf3G\x87\xd8N\xa9\x0e\xbdr\xf3y\x1f\xb7\xa7\xf9\xbd\xd3m\xec\xe7T\x87^\xb9\xf9|\xe9\xe3\xc7\xe1|>\xdc\xc6\xe62\xa7:\xf4\xba\xcd\xdf;\xf6\xf1Q\x7f\x9c\xe7*\xd5\xa1\x17\xe6y\r{\xdfu\xf7y\r[\xb1:\xf44/jN=\xd1\x99W\xa7\x0e\xbd0\xa7\x9e\xe8\xccS}\x07}\xfd\xbd\x0e7\xb1\xb0\x9b\xe1\x15\xfa\x1a{I\xf2\xa5\xec/\xd0W\xd8\x10\x0fo\xf3\xc2\xde\x1eb\x80\xbe\xc2n"\xcd\x97\xa9G\x07}\x85E\xfc\xb7\xaf\xa1C\x87\x0e\x1d:t\xe8\xd0\xa1C\x87\x0e\x1d:\xf4\x05A\x7f\xde\x8d\xf3\xa7\xc6\xddS\xc5\xe8\xd0w\xd1\x7fR\x1f\xfb\xd8U\x8c\x0e}\xecS\xbd\xdc\xaa\x18\x1d\xfag\xe2)7*F\x87\x9e\xea\xa5y\xd5\xe8\xd0\x0b\xf54\xaf\x1a\x1dz\xa1\x9e\xe6\xf5\xa3CO\xf5\xc2\xbcrt\xe8\xa9^\x98W\x8e\x0e=\xd5\xd3\xbc\rt\xe8\xa9\xbe\xd9\xa4y+\xe8\xd0\xe7q\x13\xb1\x19\xe7\x96\xd0\xa1O\xcd\xa1C\x9fZ\xfb\xbdC\xcfs\xfb\xd8\xd2A\x0e\xfa\x94\xdc\xa9^3:\xf4\xd2\xbcP\xaf\x10\x1dzi^\xa8W\x88\x0e\xbd4/\xd4+D\x87^\x9a\x17\xea\x15\xa2C/\xcd\x0b\xf5\x9a\xd1\xa1O\r\x8cKA7\x18\t\xfd\xa9\xfa\x11h\xe8\xcb\x82\xfe\xefA\x87\x0e\x1d:t\xe8\xd0\xa1C\x87\x0e\x1d:t\xe8\x1e\x0f\x84>\xc4\xe3b\xf5\xe3c\x0c\xd0=\x08\x0c}\x1d\xeaC\x17\x0b\xeb\x86\x17O\x7f\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0\x0b\xba\xa0C\x17tA\x17tA\x17tA\x17tA\x17\xf4?\xec\x98\xa1\xaf\xf48\x0c\xc4\xff\xec\xe1\xc6\xc1\xc5\xa6\xa1\x81\xa1f\x85\x81\x85f\x81afs\xdf\x97\xb6\xbb\x01OO\xba\xb7\xf7\xa4\x03\x1d\xb0\x91\x93\x19\xbb\xd2O\xa9\xb4}\xc9O1 \x00,\x00\x00\x00\x00\xf4\x01v\x01\x00\x05\xff`%\x8edi\x9eh\xaa\xael\xeb\xbep,\xcftm\xdfx\xae\xef|\xef\xff%\x82pH,\x1a\x8f\xc8\xa4r\xc9l:\x9f\xd0\xa8tJ\xadZ\xaf\xd8\xacv\xcb\xedz\xbfB\x11xL.\x9b\xcf\xe8\xb4z\xcdn\xbb\x89\xe2\xb7|N\xaf\xdb\xef\xf8\xbc>\\\xd9\xfb\xff\x80\x81\x82\x83\x84^q\x85\x88\x89\x8a\x8b\x8c\x8dd\x87\x8e\x91\x92\x93\x94\x95\x82\x90\x96\x99\x9a\x9b\x9c\x9dW\x98\x9e\xa1\xa2\xa3\xa4\x93\xa0\xa5\xa8\xa9\xaa\xabs\xa7\xac\xaf\xb0\xb1\xb2U\xae\xb3\xb6\xb7\xb8\xb2\xb5\xb9\xbc\xbd\xbe\x9b\xbb\xbf\xc2\xc3\xc4\x84\xc1\xc5\xc8\xc9\xcao\xc7\xcb\xce\xcf\xd0\\\xcd\xd1\xd4\xd5\xd6K\xd3\xd7\xda\xdb\xd4\xd9\xdc\xdf\xe0\xc4\xde\xe1\xe4\xe5\xb6\xe3\xe6\xe9\xea\xa8\xe8\xeb\xee\xef\x9a\xed\xf0\xf3\xf4\x8c\xf2\xf5\xf8\xf9\x80\xf7\xfa\xfd\xfet\xfc\xfe\t\x1c\x88& \xc1\x83\x08\xa5\xf5I\xc8\xb0\xe1\x1a\x83\x0e#JL\x02q\xa2E\x8b\x15/jl\x98q\xa3G\x82\x1d?\x8a\xec\x17r\xa4Iz%O\xff\xaa\\\x97r\xa5Kr-_\xca\xdc\x16s\xa6\xcdn\x0bo\xea\xfcWs\xa7\xcfb=om\xb0@\xb4(\x01\x0f9\x85XX\xb2TOS"O\x9fD}\x98\x81\xc0\x86\rm\x96V\xf0\x80d\xea\xd6|Ame\xb8pa\x00\x06\xb2\x04,\\(\xb2VI\xdb\xdaL\x8e~\x0cd4n&\xc8B\x86\x82\x90\x86Ld<\x00\xa9\xc8F\xfa\x02\x91\x8e\x8cd# )\xc9J"\x82\x92\x96\xccd 0\xa9\xc9N\xea\x81\x93\x9e\x0ce\x1d@)\xcaR\xba\x81\x94\xa6Le\x1aP\xa9\xcaV>\x82\x91\xae\x8c\xe5"X)\xcbZf\x81\x96\xb6\xcc%\x15p\xa9\xcb^>\x81\x97\xbe\x0c\xa6\x12\x80)\xccb\x16\x81\x98\xc6L&2\x93Y\xcce23\x98\xce|f/\xa3iI6\xb2Q\x9aG\xa0\xe6:8\xd0\x9b\x8a\xe9\xe5\x8e~\xaa\xd2\x06\xbey"\xba\xf5\x85\x9c(\x02\xe7\x85\x8c@\x96v\x92\xa50\xa5\x1a\xcf\x10\xae\xe3\xcezB\x8b\x9c\xf8\xac\x12x\xf2\xa9\x17\x14IR\x9b\xeb\x10\rx\xea\x89\x1a\xb2\x08\xe9H\xef4\'\xbd\xd0\xa6\xd0b%\x0cW\xf9l\n=\xcb2\x1a\x0b\x8cfQn$\xa7\x83\xbeI\xa1z\xb6\xffsh\xd4\x11\xd2\x85D\xda?G\x02T\x1d\x1c\xa8\\QV\x9aN\xbf\xb0t3\x16\x80\xd0b\xde\xf2\x14u>4-PlJF\xf9\xa9\xcf\x8e9\xe1)\xebt\xe8\x882yRwxs_I\x8biI\x91\x951\r|\xb3\xa6+[i\xf3 \x8a\xcf\xa7\x0c\x85(\\\xc1J\x05\xfc\xf9\xb9\xd1\x18A\xaaJ\xb9TY\x88"S\xa3X\xb2\xa8\xeb\x18\x8b;Q\xb3\x14\xa6I\xb57\x15\xc2\xdd\xbe\x18\xaa%\xbb\x14ah\xf5\xaci[\x96\xb2\x94\xa0\x86\x81\x85C\xe2\xeaS\x9a\x02T|]\xc8HD\x85%\x1c\x87\x82\x9b\xab\x16\xa57x-\x8b=\xf3\xd2\xb7\xca\xd1\xb4\xa1]\xf9*\xdd\xa2\xcaW\xa1\n\xc7\x8dkq*T\x86\x9a\x16\xc1X,K\x7fi$Z\xcd\xd1W|\x8e\x0bC)\x83\x92\xd5\xe4w\x9699U/O\xd9MQF\xa7.\xa9N5/{-\xed:\xa3c\x81\xf7\x8ckhaM\xaeU\xa3Z\xd6\xdf\xfeS\xb1x|\xd2Y\xaa\x930\xbej\x883\xffX9\xcb\x1d\xe7\xca/Ni\xc8.\x7f\x81\xaaQZ\xeb\x9b,\x81s3\x0bYi\xa2\xf4\x9a\x1b\xd4\ng\x81\x86\\-9\x98\xe4F\xce\xd2n\xa8Ka\x12c\x8c\x0b\\\xd2f\x00z\x98M\xeeh\xeb\x1a\xd6\xfc\xdc\xf4\xbcD\x98\x1bi\xabWP\xf7\xfe\x97\xc1\xaa\x85\xaeO\x1e\xa3\x9b\xe9\xba\x13\xae|u\r\xde\x14\xaa\xab\xa7\n\xa1o\xfd\xb2\x8b\x01|\xabW\xe4\xe4\xd7\xb3!^\x82\x88\xda\xea-\x87\xae\x18<\xd0\x83M\x84\x1d\xb9(\xc7\xe43\xb4\x16U\xd3\x92\xc0cQ\x02s\xb7Z\\\xeco\x82\x8d0\xb7\r\xc8\x86?\x19 \n\xb8\x8c\xbc\x04\x97\xf6\xa6\x03\xb0\xe2\xce\x05\xee\xd7\xa7\xc7\xc0\xb7\x90\xf2e\x08\x85\xb0y\x87,sY\x94^\xfe\xb2\'\xc3,fM\x92\xb9\xccg\x950\x9a\xd7<\x853\xb3\xd9\xa4j~\xb3\x9c\x99\xe0\xe69\xc77\xcev\xce\xf31\xf1\xac\xe7>\xd7\xb9\xcf}\xfc3\xa0\xf5(\xe8A\xc3\xb1\xd0\x86\xf6\t\xa2\x13\xad\x93E3\xda&\x8e~\xff\xb4L"-i\x97P\xba\xd2*\xb94\xa6M\xa2\xe9M\x8b\xa4\xd3\x9e\xf6\x08\xa8C\xad\x91Q\x93\x1a#|>\xf53M\xfd\x86\x95\n)i\xa9N\x82T\x15L\x84+k\xf6\x08\xb6fC_\x8cP\x81\xaa\\%\n_1\xc2\xaf\xe1\xe04)\x18gQ\x15\xe0\x00\x1a\x91\x82\x10V\xbb\xe1w\x17\xf0`d\x92\xed\x05hO\x19\t5C\x82]yD\x87\xce\x16\x01/\xcc\x89\x82\xb2\x8f\x90CEUA\xb2\xcaF\n\xad\xa1\xb2m\x92\xc4\x1a\x19\xb1\xfa\xabU\xaa\xf2\x1b\xd4<\xa6\xc7\xe0\x91\x0er0s\x1cB}p\x9e\xa6\xc9\x00\xec@W\xef\xe0\xc8\x072\x03\x7f\xe8c\xb4\x86\x95\x82o\x86<\x16\xdbMo\xc4C\x9e\xe4D[9\xe2\x00I\xc8`.\xbaP\x86\x0e\x9f\xebp\x0c}\r\xf1\x96\x95ZZ\\\xa6UE\xc6\xd7\xb7\n\xd6\x7f=@!6\xd6\xa7`\xae\xd2\x14\xac\x0c\xc4)#\xaa\x8aU\x0c,B\xa9\x86\xc2\xf9\xab\xdc\x8aY\xddra\xa4\xd4b\xbe\x95U@7\x03\x95\xd6\xe1\x90\x057oI\x00\xb9\xeb\xdb=CKu\x017\xeejZ\x00\xc2\x1ar\xe4$*\\\xb9\xf1j\xbc\x8a\x0f\xa0f\xcf\xd7\xde\x10\xbeQ\xcc\xb2\xda<.\xbf\x86\xa2\xfb\x85,4\xfb\x93\x03\xdd31{MYe\xf05\x00\x13\xb9\xe60r\x19\x0es\xdc^?r\xb2}\x98\xf3\xa7_`sA0\xf6\xc2"\xff\xb2\xb2*I\x86%X\xb290C\x80\xe6\xb6/\xec\xa2(\xa43/\x15dnt\xe31\x148\x04}\x13~4S1\\R,\x8a\xd7+m\xf16\x1d(21\xa2!7\xf5G\xea\x90y\xcc\xa2\x16K\xd34\xdc!6V\xc38\xac\x91#\xfevL\xad\xf7\'\xb8A6\xda\xe23\xd4\xc33\xab\xf3o\xd5\xa3\x19\x8f\xd1\x82=\x92\x17WS!\x1e\xb25M\x13oxa\x16/\xa2:UA8\x86\xf3A\x18\xa3\x83\xc8\x17{\x10\x03v\x9a1\x16\xc4\x03 \xc2\xa1+k\xe3\x82\xc4\xa3}\xb8\xc1s\x07W<\x8e\xd17\xa3\xa37\xe5\xa6\x0e\xe4\xa7\x06\xf3\xb2*7\xb2\x16\xb4c%\xd1\x86;\xd5S%\x06c:L\xa2U@\x98m\xa5\xd3 U\x814\xbb\x93-<29yX D\xf4*\xb8\xc3$\xb3#:\xcdg\x00kw;\xb2\xe3\x90\x92R\xdf\xd4\x81\xf8\xd6[\x85s \xff\xc2S\x1a\x168#U\x81>\xb0\xd3\x88+\x93\x16\xb8c%s\x02=\x981\x17m\x88\x82\xe9pz\xe7r_j\xa1?\xf7\x924\xe9\xe3\x18\xf0g?*\x13\x17\x99\xa7B\xfb\xd3(\xf3\x93\x01\x11\xb4@\xe7\x93\x8c\x1eb\x88\x1b\x84.0\x84?\xca\x11#\x04\x94\x177\xc4\x84D\xc3;\xa5\xc3\x8cq"C\xd4\xb3A\xe3\xb4\x17\xd3\xa3T`C\x85t51\xec\xe71\xec({\x9c\x82A\x9f\xd8\x81\xdb\xe8A\x87B\x8f|\xe5@{QB\x03\x94 \xc4\xe3\x86\xef\x06ob\xd4\x04S$FK\x94E_\xf4D`#E\x11\x19E>D:_\xd4|\xca\x11t\x03\xd54\x03\xc01Nd\x91\x12\xd9Ed4\x91\xafr)\x90\xa2EJ\x90E&9\x92QT\x91f\xe4C\x1ai+\x97\xd2l\t\xa9H&\x82>\xdbV!Xr_hp\x1e\xa0\x91Zp!7\xa3\x98\x10o\xa8\r\x15\xa0\x17\xc5F\x04\x19\x85wi\xc0M\xe54\x08I\xd9S\x0cq\x94\xaa\x06\x12\xff7y\x95\xbed\x95Z\xc9\x13Y\xd9\x95\xb9\xc4\x95`\xa9\x0fb9\x96\xf8P\x96f\x89\x12_\x99\x96\xae\x84\x96l\xc9Go\xc9fn\x19\x97\x08I\x97k6\x97vY\x0ex\x99\x97\x96\xb7\x96|9f~\xf9\x97f\x16\x98\x82\x99f\x85\x89M{y\x98\xd7\x90\x98\x8aY\r\x8c\xd9\x98\x83D\x98\x90\xa9H\x8f9\x99\xceP\x99\x96\xa9\x0c\x98\x99\x99\xc8\xb0\x99U`\x1c+\xe58\xbd\xa6b\xbbqM\xbe\xd1nc0\x9a\xa3R8H\xf0\x15B\xd9fY%Xi\xc0lmp\x1d\x84\xf18\x16Vc_\x85\x9aZ\x02"\xe4v\x82d\x00 9\x97\x04m\xb2\x05\xe3\x96C\xcf\x18\x9c\xbcI\x06\xbf8>\xaceW$\x07\x1f\x1e\xf7.\x8b\xe1\x1dD\x81\x151\x07-D\xb1\x16\xd9\xc9\x94\x12\xe2s\xb9!q\xec$\x9e\x0fW\x9d\x81Gs\xd7\xc4\x1e*us\xc3A\x19\xd6\x11\x9e}Bq%\x97]\x1e \x1d\xdc\xd6\x9dC\xe0\x9e\xfdV,\xe4\xf9\x9d\t\xb3\x1f\xbd\xe1\x85\xff5\xa2\x18G\x96.\xd7\xe2\x9b\x941\x14\x97\x91\x19\xdc\xa1o\xc5r!\xc7\x91dx\x01\xa0pY\x0e^\x08QeQ#\x1e\xb7!0\x82"N\x16y\xbb!\xa1\x8bb!\xbb\xe1\x1d\x034$\x94\xd2 \x12\xe2\x93\x9f\xa3\xa1\x7f\xd2$\x11\xc2rn\xa2&&\ne#\x06(Z7\x16hG$*\xda3\xf5\xa1w\xbc\x92#\x0f\x82\x83-\xd2$D\xd2u \xc2\xa34\xd23\xb8\xf8\x1d-\x8a\'^\x08\x19\x18\x92\xa4k\x82!6\x8a~\x0f\x12!Kg\xa4\xd3\x02%,\xb7\x9d=\xca\xa2B\'\x99{B3$\xb3$\xca\xd72\xa8!Z\x15H\x93(\x94Qn\xfa/\x02\x97\x1b\x9c\xd29\xbda%\xa4\x93{\xf5\x81\xa6\xc4\xd7#m1\x14D\xc0}E\xe2y\x83\xd2 C\x00)z\x9a t1\x81`\xb8"\x92\xd3=o\x1a\x18\x86\xfa\xa6z\xba\x8a\xaa\xc7\x1d\x19\x12\xa5\xbd\xa1#\x81#\xa7\x1bY+\xe2\xa7(\x89\x11\x18\xb9\x02\xa9\x83\xb1%z\xfa-\xe6\x00\x82\xff\x0b9A\xe8\x81\x17\xe3\xa12\t\xb3%\xc4\x02\x82\xb4\xca\x18 (\x18M\x01(\x87\x11o\xbdB3V\x11#\xafJ\x1a)\x13\'Q\xd1\x7f#\xa4\x7f\xda\xa7\x16\xbd*/gs\xaa\xb8*/\xdb\xe8~\xbaz)\xcc\xda\x16\xbc\xea\x8c\x02x.\x17hn\xb1b,\xba\xda>\x8c\xc1{\x1bH<[\xd2\xac\xcei\xa1v%p\xe3\x01=MX%\x12\x90C\xb4:7\xc22\xaf\x87\xca\x9a\x1e\x08\x18f\xc8,\x18\xa8\x84=\xf2\xaf\xedjn\x84\xe3\x81\xa4c\x86\x8f\xa10<3S\x9c",\xfbz4 s\xaa\x89b6\xbe\x08*K(\x18\x0b;\x1e[\x88\x1b\x17\xba8K\x18\xa5mQ.\x83\x81u&#\xae\xb2\x12\x83Kh\xae{\xd1\xb0\xe9J\x0e\x17z 8\xe4\xae\x91!\x89\xa63\xabqC4\x18b\x84\xe0a0\x1a\xc2)\xf9\x1a{\xb1\x83\x19\\\x14>\xa0\xe1\xb2\x94\x85\x86\xdas\x8bC\x00#\x14\xb22\xa1\x82;\x1er\x164\x92\x17\x94\xf2\xb3\x7fC\x8e\xff\xa7*\x89\x98\x01*\xb8\xe1\x8a|\xd8u\xd95\xb5\xa7\xe89\x17*\xb4|\x15\x17x\xf1\xad\xdc\xe2\x1eVR#\x123\xae\x91\xf8\x8a\xe6\x9aUa\xcb\x91}\xf98\xe9\'\x8a#\xd664\xe2Aq\x02u\x98#@\x07\xd42\x0f\x04\xb8\xf8\xda\xb3\nC\x8f\xd741\x07\x84\xb7\x16\xa0\xb7F\xa622\xd4/&\xd4\x89\xeb\xe7=\xf0\xb2\xb8\xe2\xc80\x96\xc8P|\x13#\xe9R.\xfa\xe8\x1d+3b\x9f\x91>&fW\x8a\xdb\x07\x1e\xcbm.\xc39\xa0C\xb2\xe6\x16\xbac\xf8@\x88\xdb.c\x1a\x8ca\x84\x040yE\x0b9\x91e\xd4\x92V\xa0D\xb9{\x04t\xf1E?\xc4+,\xe9C\xbcK\x91\x17\tE\xc8;\x060\x99\xbc\xc4\xfb\xbb\x02\xe1\x99\xe4\xb0Tc\x80\xbd\xaad\xbd\xe1`sg\xe0\xbd\xb2\xc4\xbd\x9c\xf9\x0b\xe2;\xbe\xbdP\xbe\xe6\x9b\x0b\xe8\x9b\xbe\xb7\xb0\xbe\xec;\x0b\xee\xfb\xbe\xb1\x10\xbf\xf2\xfb\n\xf4[\xbf\xabp\xbf\xf8\x9b\n\xfa\xbb\xbf\xa5\xd0\xbf\xff\xfe;\n\x00\x1c\xc0\x87D\xa6\x04<\x13\x03|\xc0\x9c\x90\xc0\n\xbcH\r\x0cf\x06\xfc\xc0\x99\x16\xc1\x12\xcci\x14\\\xc1\x9fv\xc1\x18,j\x1a\xbc\xc1\xa5\xd6\xc1\x1e\x8cj!\\I\x0c<\xc2\xf6\x00\xc2&\xec\x10%<\x1e\xe7\x82\x1eH \x84S\xf0\x9a\xf3\xb4\x9cG \xc3\xa9A\xc3FPyD\xa6\xba`\xa0\xc3V\x80\x14\xc3\x96\x05\xcc\xe6\x15\xeb\x06\x8c\xdf\xb0\xb6LC\'G\x10\x1f\x80\xd8\x04M,\x19\xc0\xa9\x04Ol\x04\xcdI\x9c6\xccn\xce\xc3\x05\xda{\x05\x9b\xb7\x94\xb4\xb0\x7fA\xb4\xc5u\x0b\x0el\xaa%\x9f\xa7od\x12!U\'-\xbeI\x045Rq\xf5\xa1\x1bfH\xa1"\xca\x9f}"\x18\x10\xea\x1e\xbd\x02\x9fp\xa5\x18j\xbb\x9d\xa9\x01\x9aBH\x1ei\x9c]\xafR\x15\x14\n\x81\xd4\x93\xc8\x03z@\xf7!"C\xd1\xc6\xf2\xb95)\xe3Oo\x8co\xc51\x1c\xef\xe2\xc2\xf3d\x1c\xa0\xd1N \x97.\x97\xac\x19\x05W\x1d\xfd\xb6\x1c\xf5q\x1f\xff\xf9\xd1\x14\x91\x8c\xc38\x11\x0e\x868O\\\xaa-j\xdc!V: f\xab\xa5{\x18\'%\xb2\xa4f!\xa2a:\xab0\xba&U1#.\xeatCr\x1a\x80<"B\xe2A:\xfa#\x06S\xc9\x0c\xc8\x82f7\x87\x83Z\xcdiaQS\x9a\xccR\x0b\xcdk\x81v`"\xcd#\xa2\xcbk\x11\x1a\xd1V2C3&A\xa2\x1b\x06\x83\xaaO\xb7&1\xf5x\xe5\x14\xe0@\xe2\x89\xb8\x05\x1e\xac\x14\x1427\xa4\xc8\x05D\xba\xde\xea\xd9\xd1\xc30\xe4\xd8\xe1\n~\x8c\x19\xde\xc2\xbcA\x90\x0f\x95@Je>+T\xce\x0bn\x8c\x194\xdbDS\x905t\xe2\xe9r\xa1H\xa9\xd7\xa8@\x16\x93\xed\xbb\x14Y\xbcG\xfemv\xb1\xbb\x87rER@\x91L\xb4\x92\xd1{\x91/i\x04P\x14\x91\x10\x99\xe5\xcc\xeb\x92@N\xbd\xd3\xdbEQ\xde\xad\xbe{D\xbd;\xe6\xff-\x11@\xfe\ri\xde\x05k.\xc2\x16!r\xe6\x00\xe7d \xe7\x1fQ\xc2)\x9c\x08v~\xe7\x85\x90\xe7z.\x95B\xde\xe7\x03\xc1\xe7\x80\xbe\x0f\x7f>\xe8\xfe \xe8\x86\xbe\x07\x88\x9e\xe8y\xb0\xe8\x8c\xdee\x85\xfe\xe8\xf5\xe0\xe8\x92\x0e\x10\x91^\xe9\xf0@\xe9\x98\xce\x0c\x97\xbe\xe9\xaa\xea\xe9\x8a\xd6\xe9\xa0~\xe6\xa3\x8e\xc0\xa2^\xea\xff@\x8d\xea\x93v\xea\xaa\xce\r\x9a\xde\xeaf\xf0\xea\xb0\xfeJ\xb3\xfe\x12\xb2^\xeb_p\xebX@\x9bKP\xc4n\x9c\x14\x8c\xf2Mr\x04\xcaI\x93\x04\x91\xc1d\xd8\xc0US\xa9\x1d\xec4\x93I\xa9\x04\x19\xe0\xebJ\xc0\x91t\xae\x04\xd2n\x15\xac\x0e\x14\xd9\xfe\x0b\xb0\xb8#k\x0e`\x02\xe6XFq^\xf7S#3\x07\x00\xd2.\xd7\xa5\x850\x1c\xf5$\xdb\xf6\xec\xed\x15\xa8\xa2A\x16\xae\x11dH\x85bG\xf0!h\xb4*1\xe5^\xc4\xb1:M \xc3\x08&\x11\xba\xee\x04E\xba\xa2+\xb6m\xc12`XL`\x0e\xbf>G\xd0\x8e\xe4\xb4\xee\x0f\x8aW\xcc#dl\xa1>~E\xf1\x0b\xe6[5\x02\x9c\x98\xb1$w|WluQS\x91R\xae\xf1P@>X/5\xf0\x11Q\xf0N\x10\x83\xf2\xe1\xb6G\xc0\xaf\x026*kM\xeeMr\xee\x90\x9aM\x1aI\xd6(4T.o\x15\xe0\xbe\xf1\xbarQ\x1d\x90\x80}\xad\x17\xd4qS\x9c\xd1\xc6R\x0b\x00\xff\xd7\xc4)\xe4tA)s\\\x80\xe7.\xdf\xb6`\x01F\xf0\xdb\xde\x0b|\xa2\xf5\x92q\x85\xa3=\x84\x0f\xef\x1bt$\x1eI\xd07#\x85[\xa9!\xb3\xa55\xf4\xaa#"\x1b\x8f)z\x81\x87\xd5\x9d(sb\xb9\x85a\x9aH\x908\xe1C\xf1\xd7\xc6\x16\t\x7fk\x1c\x7f\xf6^\x8f\xe6\x97%\xe6\xbe14\x0f8b\r\x07b\xe5.d$\xf6XF\xc0)\x13H74\x9a\xcd\xbce\x9dn\xe3{\x13\x85W\x91\x85\x1cI\x96&\x91qXl\xe1Q\xed\x04.\x15\xb3\x88G\xd1^\xa45OXcoe\xb9\\,*\x03MH\\\x0e.+\x0f\xcc\x83\xb9\xd0\x8c*\x1d\x06\x03\xddL\x83\xde/8,\x1e\x93\xcb\xe63\x1a\\\xa9\xa4\xdb\xee7<._a\x06\x16\x8f\x92\x97\tZUY\xde\xc5\x9eJFV\xa1F\x1f\xe0\xce\xca\x86J\x85\x93J\x15A\x97\n\x05\x07\x07\xd0\n\xca\n\x85N\x12A\xc7@\xcd\xc9\x12\x8a\xa2\x17"_\x96\xd1\xc5\x00\xa3PaVS\x98\x15\x05)\x9b\xe4\xe3\x9c\xee.o/\xda\x9ao\xb0\xf0\xf0\xf0\xd0\x18\xe2\xdfW\x11\x99&\x99\xd1$X\xf3\xd1-d(\x9d\x06i\xe0\xec\xb6\x0f\x87\x8575\xaa1\x18*\xcf\xe42q\xba\xfa\xfa\x170\xfb;|\xbcO.y\xcd\r\x0e\x0e^\xa3L\xe1\x8bW\x05>\x1c\xaez\xb0\x81\xa6\xccT\x05\x00vb\x00\xa0F\xa1N\xc0r<$\xd6\x80r\xc7\x03\x9b\n\x8a,\xc0\xaaC\x8f\x0f\x18\x83\xf2F\xff\x92\x8c\xe3\xae$\xca\x94*W\xbe\xa1\x80I\x1d\xb5F\x9eX\xd2\xac)\xe7\xa4\xcd\x9c:w\xf2\xec\xe9\xf3\'1\x9c@\x87\x12-j\xf4(RvB\x932m\xea\xf4)T\xa2K\xa3R\xadj\xf5*V]S\xb3r\xed\xea\xf5+\xd5\xad`\xc7\x92-kv\xa5\xd8\xb3j\xd7\xb2mk2\xa6\xdb\xb8r\xe7\xd2\xad\x91\xb6.\xde\xbcz\xad\xde\xdd\xeb\xf7/\xe0\x9f}\x03\x13.lx\xe4\xe0\xc3\x8a\x173\xd6\n\xb71\xe4\xc8\x92\xd3$\x9el\xf92\xe4\xca\x987s\x06\xac\xb93\xe8\xd0r?\x8b.m\x9a,\xe9\xd3\xaaW\xf3}\xcc\xfa5\xec\xac\xa9c\xd3\xae\xdds\xb6\xed\xdc\xbaU\xe2\xde\xed\xfb\xb7R\xd7\xc0\x87\x13G,\xbc8\xf2\xe4\xc3z+o\xee<\x0c\xf3\xe7\xd2\xa7\x13\x88\xce\x19c\x99\x1b\xa6VtP\x07@\xc3\x98\n\x82\xba;\xbb)(<\x1e\x8a\x94\xf1l\x18\xa8\x02c\x05}\xd2\xado\x1e7\xc6@\x87\xf35\x0c\xbc\x0c\xc6\x01\xbc\x18\xc6\xa0s_\x7fi\xff\xd8\'\xc62\x1f\xbd\xc1AA\xdbU\xc0\xc1\x80\xcf\xd1\x87\xd9\x81\xd5\x1d\xd2\xd0&\x18\x18\x80A\x06-X\xa0A\x06\x14l\xb0!\x0b\x18|\xa8\x9f$\tY@^\x06\x87\xcc@\x00\x11&j0\x13\x8c/\xb8\xa7B\x07/\xcc\xc8\n\x88\x1a\xb4\x92\xc1@;\xe4\xf8\xe1%\x06\x90\xd0\xde\x1d\xff\xad8\x88\x8b4\xc4H$\x8f{\\`c\x0f9\xca\xf0\x0c\x8e0t\x07\xa4\x8cILi\x81+\x1bT\xe0\xe2\x91\x92x@\xa6\r\'X\xf1\xe5\x8d\xc3Mx\xd9\x81\x1cl\x88\xc2\x00\x98h\xc8a\x0c( 1"\x06\x14\xe4\xa9\xe1L\x06\x00\xa0g\x87s\xfaS\xc7\x14\x03\x00\xd8\x88\x01\x1b\xb0b\xca\x05\xf8M\x89\x8d\xa2-\xb62 \x7f\x91v0)\x01\x99\xce9@+\x1d\x18\xd0\xa1\x0e\x17\x1c"I\xa9\x8aFYA\xa3\x8f\xd6\xa0\xe9\x88F\xf0\x97\x01~\xa2\x9e \xa8\xa9\xab4\nJ\n\x16\x0c\xd0\x81\xa8\x8e>\xf3i\xafD\x18\xc1\xaa\xa3:$\xf7\xa6eqvG\x01\xad5\xffP0\xab5\x04h\xb0\xc1\xb4\xd0V{\xed&\xa3\x12\xe0\xe7\x05\x1b\x00\xc8\t!\x00F\xc8-\t\xd5\x92\xf0\xc8\r\xe0\xd6\xf2\x03:\xd3\xae\xabB\xbbF\xb2\xf2\xad\x16*\x0c!.%&``\xee\x0f\xc6\xa0[B\r\xe8\xd6[\xdd\xaf$\xc0h\xe4\xb7&\x0c\x1c\n\x98~L\xf1\x0c\x80\x04\x0f\xb1\xcc\xc3\xcb\x1eW[\x85&\x1a0\x00\x7f\xfb\xbd\x0b\xc4\x1f\x99\xfe \xb1\xc8\xfa\x96\xa2D2\x11Z`\x00\xcc0c\xf2\x1f\xc8v\x80\x0b\xe3*}\xf0G\xf3\xc7?\x98\xe4$g\xe2b\nz\x9f\xe0\x08x\xb4\xd0\xb8\x87`\x19g{63\x90\x1f\x112\xa7%\xff!\xa0\x1d<\xf2\xe2\x06\xfd\xbb?X\x8e\x04ic\xcdY\xbfR!wJb\xca\xdd1\x80\x98ID\x87\xe6\x15M\xb4\xa2\xdb\x9d\xa9S\xdcd\xe29\xafv\x16\xbd\xdd\x15\xfaT\x82p\r\x80\x03\x12\x90\xc1\xb4\x18Tk\x0b\xd2CY#z\xdf\xf9(\xa8\x11P\xff\x07\x0f\xb6SS\x931\xd5eK\x07\x0e\t\xfe\xfc\xdd\xb1\x13M\xe3\x04ZK\xac\x972\xb2\xca\xa6\x17\xa9\xeaQ;_e3\xdf\xa20\xad[\xc6\xf0\x16f\xca-\xa1\xe3\x1a\x85BE\x85;\x19\x05<`-Z\xf8n/\x7fyk\x0e\x1c\xc0\x06\x03h\xd3\xeaxm\x07#\x1e\xc1\xff2\xc0B\x1ba\x82\x7f\xbez\x9f7\xcc\xddi\x90\xcdP\xdf\xf56Y\x99\xabU\xc0\x84=\xdbz\xfc\xee\xdfD\x14]\xe0\xff(\xfc\xd1\x04\x04aS\x8f\x16\xa9@G\x8d\x06\x0f\x15M\xa6\xbdbE/4\x11\x0cP\x1cc\xcb\xa9\xb8\xf2*j\xb1\x07F\x94\xa2\x18\xc3X\x83,l\xb1\xe51\x97V\x16\xc7\x10\xf4.*\xb1\x0c\xffA\x94\xd0\xb8\xa7i\x86\x93+\x1d\x0e)\x9f%\x11\x9a\x1e\xc7\xa4K\xbd\xea\xc8\xa4\xba\xd5\xb3\xceL\xack\xbd\xeb\xd0\xe4\xba\xd7\xc3NM\xb0\x8b\xbd\xecnx\xba\xd9\xa5\x8e\xf6\xb4+}\xedl\x97\xa6\xdb\xdf\xee\xcc\xb8\xcb]\x99t\xaf\xbb1\xef\x8ewa\xea}\xef\xbe\xec\xbb\xdfu\t\xf8\xc0\xdbr\xf0\x84\x97\xa5\xe1\x0f\xef\xca\xc4+^\x95\x8co\xbc)\x1f\x0fyQJ~\xf2\x9e\xac\xbc\xe55\x89\xf9\xcc\xaf\x85\x02\x07H\xc0\x07>\x90\x80\x048\x80\x00\x0fp\x00\xeaQ\xbf\x809H \xf5\xaaG\xf0\xd49\xbf\xc6\x03|@\x00\xb6\xbf}\x02\x08\xa0\x80\xdb\xdb>\xf7\x04\x00}\xe8C\x7f\x8038\x80\xf7\x02\xf8\xc0\xd5eO\x9d\x03\x18\xff\xf6\x05\xa0@\x01\x8c\x9f\x80\x11\xd4\x9e\xf7\n8\x03\xf3y\x8f\xfcwl^\xf9`\x91@\xf5\x9b\x7f\xfd\xe8\xf3\xde\xf7\xe1\xb7\xfd\xf5\xcd\x90\xfd\xdbo?8\xde\x7f\xce\xfa\xd9/|\xe8K_\x02\x048\xbf\x00\x14\x00\xfb\x1a\xc4\xff\xf8\xc9\x7f\xff\xbfr\xd0\x9f\xf6E\x80\x04H\xc0\x08\x90\x1f\xee\x99^\x01\x04\x9f\xf0a\x9f\xf1\xb5\xdf:t\x1f\x00rE\x02H\xdfK `\xef\xd9\x9f\x01\xbeA\xffA L\x90\xdd\x04r\x86\x00\xde^\xfa\xad\x00\x06\n\x80\xef\x19]\x04,\x00\x01\xf2\xdf\x03\xbe\xc1\x04L@\x03\x8c\x00\x05\xc8 t\x80`\x08Z\x06\x03>`\xf0\x1d\xc0\x08f \x01, \x03:\xc0KH\xc0\x02>`\x02\xac^\x07\xd6\x00\xf0\x05_\x01H\x00\x04 \x00\x02\x04\x80\x142@\x030@\x00da\x000\x00\x040\x80\x14\xde`\x0e\x0eG\xf3\x8da\x01\x04\xa1\xf4\x99\x1e\xfe\x95\xa0\xee\x8d!\t\x12\xc0\x12~\xcb\xee\x19\xdf\xf0M\x80\x16f!\x02`a\x1d"@\x03T!\x18\x86\xe1o\xb0\xa1\xf1\x15@\x04\x9c`\x02\xd8_\x1a\xae@\x05\x02\xa2\x00\x0c\xdf\x1b.@\xf3=!\x054@\x1dR\xe1$n!\x01x!\x02\xf4\xa1\x1f\xee\x86"& !\xaa\xc0!\xfe^\'\n@\xe9\xbd!\xfe}\x00&T\xffb%2\xc0\x08L\x80\x18H\xe0&:\xc5\xe8\x81\x1e\x12\x8e^\xe9\x11\xa2!\x1a\xdf\xf5\x15_\xf3}@\x01(\x80\x10\x12\xe1\x12\x9e\xe0\x07D\xc0\nH\xa2*\xd6\xe1\x04@\x00\x19\xc0b,>E"\xb6a\r|\xe2\xfd\xe9\xa2(\x06b\rH\xc0\x02\xd8\xdf\x126_\xe9\xd5\x80*\xde\xe11\x96\x813>cSD\xa3\xed\x95\xe14\xd6_5Z_;\xe2\xde\x03\x80M\x07\xa2\xa3\x00\x94!& \xe3$B\x80+\xfe\x02\x0e\x9acc\xd0\xa3:\x9a\xe0\x19\xbe#\xfa=\x807~A\xff5_\n\x82\xe3$2\xc0\xd9\xf5\xa3?.\x06@\xf2\x00.\x12d\xfeE@\xf3E\xc0\x15%d >\x00\x0f\xe0\xa3\x16N\xc0\xfe\x05A9F\xe4QL\xe4:\x96\x1f(\xea\xa2A\x1a\xdf\xeayA\x07\x1e$\x0fTb\x03<\xa4I\xe6\x06J\nd\xf9\xe5\xa2;\x8a\x1fB>`\x1c\xb2\x9f1\x1ec%\xbeAI\xde$Q\xe4\xa4\nP\xe3!\xe2\x9f\x00\xbcd\x0ft \xf8\r\xff\xe4\nT"3\xb6\xc1Q"%P(\xa5\x19\xee\xa4E\x8e_/\xea\x9f\nH\xc0\x01\x08\xe3\x0b\x9e\xe0"\x02\x01Hf\xa1QB\xa4V\x06\x06W2\xa55.\x80S\x1e_\xe8\xa5\xa3\x1b\xbe\xe0T\x1a\x9fG\xaa\x80U\xda\xe4[\xc2F\\\xb2c(rd \x92\xe5\x0b\xe6\xa5\x05\x12\xc0Z\x06@[\x06\xa6`bcJ&\xe0W\xae\x00\xed)b\xee-!\xf5\xf9$\x01\xfc%V\xba%d\xee\xc5`ze(\xaa@\x04\xd0#\xef=\xdf\x1b\x12\x00/\xf2%\x04\x14%`\x86\xa6j\x1c@\x01\xd4fmB\xe5\xb7,\x80m\xd6\xe6\xf0y\xden\x16\x00n\x8ee\x014\xe1\xe8)\x801\x0e\xe2n\x0e\xdf\n\x00\xa3m\x02\xe3\xeay!t:dl\xca&l\xf4\xdc\x1c\x8c\xa4\xcd\xe9\xdctR\xa7\xd7e%w\xd2\xd3w\x8a\x9dw\x86gn\x8c\'yB\x14h\x9e\'\xe5\xa5\xa7z^\x1e{\xb6\xa7\xe6\xbd\'|^\x92y\xce\xe7j\xd4\xa7}6R~6\x1d~\xee3\'\xca\xad\x01\x80\x06\xa8\x80\x0e(\x81\x16\xa8\x81\x1e(\x82&\xa8\x82.(\x836\xa8\x83>(\x84F\xa8\x84N(\x85V\xa8\x85^(\x86f\xa8\x86n(\x87\nh\x08\x00\x00;' 4 | --------------------------------------------------------------------------------