├── .gitignore ├── README.md ├── data ├── test.zip └── train.zip ├── exercises ├── __init__.py ├── test_exercise1.py ├── test_exercise2.py ├── test_exercise3.py └── test_exercise4.py ├── notebooks └── tutorial.ipynb ├── requirements.txt ├── run.py ├── src ├── __init__.py ├── model.py └── transformers.py └── tests ├── __init__.py ├── test_country_transformer_pytest.py ├── test_country_transformer_unittest.py ├── test_transformers.py ├── test_transformers_hypothesis.py ├── test_transformers_mocking.py └── test_transformers_parameterised.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore local virtualenvs 2 | lib/ 3 | bin/ 4 | include/ 5 | .Python/ 6 | *.pickle 7 | *.joblib 8 | .ipynb_checkpoints/ 9 | *.pyc 10 | *.pyo 11 | *.swp 12 | *.class 13 | *.orig 14 | *~ 15 | .hypothesis/ 16 | 17 | # autogenerated 18 | src/_pytest/_version.py 19 | # setuptools 20 | .eggs/ 21 | 22 | doc/*/_build 23 | doc/*/.doctrees 24 | build/ 25 | dist/ 26 | *.egg-info 27 | issue/ 28 | env/ 29 | .env/ 30 | 3rdparty/ 31 | .tox 32 | .cache 33 | .pytest_cache 34 | .coverage 35 | .coverage.* 36 | coverage.xml 37 | .ropeproject 38 | .idea 39 | .hypothesis 40 | .pydevproject 41 | .project 42 | .settings 43 | .vscode 44 | 45 | # generated by pip 46 | pip-wheel-metadata/ 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Testing for Data Scientists 2 | 3 | ## Packages to install 4 | ``` 5 | pip install -r requirements.txt 6 | ``` 7 | 8 | ## Commands available 9 | ``` 10 | python run.py train 11 | python run.py test 12 | python run.py unittest 13 | python run.py coverage 14 | python run.py exercises 15 | ```` 16 | -------------------------------------------------------------------------------- /data/test.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cambridgespark/pydata-testing-for-data-science/5669d582659e54c6b54463b94fb1feb7a6b301aa/data/test.zip -------------------------------------------------------------------------------- /data/train.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cambridgespark/pydata-testing-for-data-science/5669d582659e54c6b54463b94fb1feb7a6b301aa/data/train.zip -------------------------------------------------------------------------------- /exercises/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cambridgespark/pydata-testing-for-data-science/5669d582659e54c6b54463b94fb1feb7a6b301aa/exercises/__init__.py -------------------------------------------------------------------------------- /exercises/test_exercise1.py: -------------------------------------------------------------------------------- 1 | """ 2 | This exercise is about writing a unit test using py test 3 | """ 4 | 5 | from src.transformers import CategoriesExtractor 6 | 7 | def test_extract_categories(): 8 | """ 9 | Write a unit test for CategoriesExtractor.extract_categories(json_string, False) 10 | :return: 11 | """ 12 | pass -------------------------------------------------------------------------------- /exercises/test_exercise2.py: -------------------------------------------------------------------------------- 1 | """ 2 | This exercise is about writing a parameterised unit test using pytest 3 | """ 4 | 5 | import pytest 6 | from src.transformers import TimeTransformer 7 | 8 | def test_time_transformer(sample_df, expected_df): 9 | """ 10 | Write a parameterised unit test for TimeTransformer 11 | :param sample_df: sample df to test with three columns: deadline, created_at, launched_at 12 | :param expected_df: result with two columns: launched_to_deadline, created_to_launched 13 | :return: 14 | """ 15 | pass -------------------------------------------------------------------------------- /exercises/test_exercise3.py: -------------------------------------------------------------------------------- 1 | """ 2 | This exercise is about refactoring a unit test to improve it's readability and maintenance 3 | """ 4 | import pandas as pd 5 | from src.transformers import CountryTransformer 6 | 7 | import pytest 8 | def test_correct_country_returned_with_simple_df(): 9 | """ 10 | Refactor this unit test to apply the Given/When/Then pattern 11 | :return: 12 | """ 13 | df = pd.DataFrame({'country': ["CA", "GB"]}) 14 | country_transformer = CountryTransformer() 15 | assert len(country_transformer.transform(df).index) == 2 16 | assert country_transformer.transform(df)["country"][0] == "Canada" 17 | assert country_transformer.transform(df)["country"][1] == "UK & Ireland" -------------------------------------------------------------------------------- /exercises/test_exercise4.py: -------------------------------------------------------------------------------- 1 | """ 2 | This exercise is about writing a property-based unit test using hypothesis 3 | """ 4 | 5 | from hypothesis import given 6 | from src.transformers import CategoriesExtractor 7 | 8 | import pytest 9 | def test_extract_categories(json_string): 10 | """ 11 | Use hypothesis to generate test cases for CategoriesExtractor.extract_categories. 12 | Think about an appropriate property to test against. 13 | You should be able to find a bug and fix the implementation accordingly 14 | :param json_string: 15 | :return: 16 | """ 17 | 18 | pass 19 | 20 | -------------------------------------------------------------------------------- /notebooks/tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 41, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "pd.set_option('display.max_columns', 100)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 42, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stderr", 20 | "output_type": "stream", 21 | "text": [ 22 | "/Users/raoul/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2698: DtypeWarning: Columns (21,22,23,24) have mixed types. Specify dtype option on import or set low_memory=False.\n", 23 | " interactivity=interactivity, compiler=compiler, result=result)\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "PATH = \"../data/train.zip\"\n", 29 | "df = pd.read_csv(PATH)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 43, 35 | "metadata": { 36 | "scrolled": true 37 | }, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 60 | " | id | \n", 61 | "photo | \n", 62 | "name | \n", 63 | "blurb | \n", 64 | "goal | \n", 65 | "slug | \n", 66 | "disable_communication | \n", 67 | "country | \n", 68 | "currency | \n", 69 | "currency_symbol | \n", 70 | "currency_trailing_code | \n", 71 | "deadline | \n", 72 | "created_at | \n", 73 | "launched_at | \n", 74 | "static_usd_rate | \n", 75 | "creator | \n", 76 | "location | \n", 77 | "category | \n", 78 | "profile | \n", 79 | "urls | \n", 80 | "source_url | \n", 81 | "friends | \n", 82 | "is_starred | \n", 83 | "is_backing | \n", 84 | "permissions | \n", 85 | "state | \n", 86 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 91 | "663816109 | \n", 92 | "{\"small\":\"https://ksr-ugc.imgix.net/assets/012... | \n", 93 | "Angular - Where Modern Art meets Cards | \n", 94 | "Angular is a minimalist card design for simpli... | \n", 95 | "17380.0 | \n", 96 | "angular-where-modern-art-meets-cards | \n", 97 | "False | \n", 98 | "US | \n", 99 | "USD | \n", 100 | "$ | \n", 101 | "True | \n", 102 | "1459283229 | \n", 103 | "1455845363 | \n", 104 | "1456694829 | \n", 105 | "1.000000 | \n", 106 | "{\"urls\":{\"web\":{\"user\":\"https://www.kickstarte... | \n", 107 | "{\"country\":\"US\",\"urls\":{\"web\":{\"discover\":\"htt... | \n", 108 | "{\"urls\":{\"web\":{\"discover\":\"http://www.kicksta... | \n", 109 | "{\"background_image_opacity\":0.8,\"should_show_f... | \n", 110 | "{\"web\":{\"project\":\"https://www.kickstarter.com... | \n", 111 | "https://www.kickstarter.com/discover/categorie... | \n", 112 | "NaN | \n", 113 | "NaN | \n", 114 | "NaN | \n", 115 | "NaN | \n", 116 | "failed | \n", 117 | "
1 | \n", 120 | "1462931821 | \n", 121 | "{\"small\":\"https://ksr-ugc.imgix.net/assets/014... | \n", 122 | "Ladybeard is KAWAII-CORE | \n", 123 | "Original songs and music videos to jump start ... | \n", 124 | "24000.0 | \n", 125 | "ladybeard-is-kawaii-core | \n", 126 | "False | \n", 127 | "US | \n", 128 | "USD | \n", 129 | "$ | \n", 130 | "True | \n", 131 | "1484110800 | \n", 132 | "1475568868 | \n", 133 | "1480946454 | \n", 134 | "1.000000 | \n", 135 | "{\"urls\":{\"web\":{\"user\":\"https://www.kickstarte... | \n", 136 | "{\"country\":\"JP\",\"urls\":{\"web\":{\"discover\":\"htt... | \n", 137 | "{\"urls\":{\"web\":{\"discover\":\"http://www.kicksta... | \n", 138 | "{\"background_image_opacity\":0.8,\"should_show_f... | \n", 139 | "{\"web\":{\"project\":\"https://www.kickstarter.com... | \n", 140 | "https://www.kickstarter.com/discover/categorie... | \n", 141 | "NaN | \n", 142 | "NaN | \n", 143 | "NaN | \n", 144 | "NaN | \n", 145 | "failed | \n", 146 | "
2 | \n", 149 | "1724358498 | \n", 150 | "{\"small\":\"https://ksr-ugc.imgix.net/assets/011... | \n", 151 | "Vegan Cafe Delivery Service in Vancouver BC | \n", 152 | "Our project is to launch a vegan lunch deliver... | \n", 153 | "40000.0 | \n", 154 | "vegancafeca | \n", 155 | "False | \n", 156 | "CA | \n", 157 | "CAD | \n", 158 | "$ | \n", 159 | "True | \n", 160 | "1408549628 | \n", 161 | "1405218883 | \n", 162 | "1405957628 | \n", 163 | "0.926746 | \n", 164 | "{\"urls\":{\"web\":{\"user\":\"https://www.kickstarte... | \n", 165 | "{\"country\":\"CA\",\"urls\":{\"web\":{\"discover\":\"htt... | \n", 166 | "{\"urls\":{\"web\":{\"discover\":\"http://www.kicksta... | \n", 167 | "{\"background_image_opacity\":0.8,\"should_show_f... | \n", 168 | "{\"web\":{\"project\":\"https://www.kickstarter.com... | \n", 169 | "https://www.kickstarter.com/discover/categorie... | \n", 170 | "NaN | \n", 171 | "NaN | \n", 172 | "NaN | \n", 173 | "NaN | \n", 174 | "failed | \n", 175 | "
3 | \n", 178 | "314918941 | \n", 179 | "{\"small\":\"https://ksr-ugc.imgix.net/assets/011... | \n", 180 | "Photoetched Rail Yard Exposition | \n", 181 | "I have developed a process of my own which tra... | \n", 182 | "1000.0 | \n", 183 | "photoetched-rail-yard-exposition | \n", 184 | "False | \n", 185 | "US | \n", 186 | "USD | \n", 187 | "$ | \n", 188 | "True | \n", 189 | "1364084914 | \n", 190 | "1360627778 | \n", 191 | "1361496514 | \n", 192 | "1.000000 | \n", 193 | "{\"urls\":{\"web\":{\"user\":\"https://www.kickstarte... | \n", 194 | "{\"country\":\"US\",\"urls\":{\"web\":{\"discover\":\"htt... | \n", 195 | "{\"urls\":{\"web\":{\"discover\":\"http://www.kicksta... | \n", 196 | "{\"background_image_opacity\":0.8,\"should_show_f... | \n", 197 | "{\"web\":{\"project\":\"https://www.kickstarter.com... | \n", 198 | "https://www.kickstarter.com/discover/categorie... | \n", 199 | "NaN | \n", 200 | "NaN | \n", 201 | "NaN | \n", 202 | "NaN | \n", 203 | "successful | \n", 204 | "
4 | \n", 207 | "1766165140 | \n", 208 | "{\"small\":\"https://ksr-ugc.imgix.net/assets/011... | \n", 209 | "Cinnamon Fletcher needs to be brought to life! | \n", 210 | "Need to pay an illustrator to bring my childre... | \n", 211 | "700.0 | \n", 212 | "cinnamon-fletcher-needs-to-be-brought-to-life | \n", 213 | "False | \n", 214 | "GB | \n", 215 | "GBP | \n", 216 | "£ | \n", 217 | "False | \n", 218 | "1382600001 | \n", 219 | "1379704502 | \n", 220 | "1380008001 | \n", 221 | "1.602384 | \n", 222 | "{\"urls\":{\"web\":{\"user\":\"https://www.kickstarte... | \n", 223 | "{\"country\":\"GB\",\"urls\":{\"web\":{\"discover\":\"htt... | \n", 224 | "{\"urls\":{\"web\":{\"discover\":\"http://www.kicksta... | \n", 225 | "{\"background_image_opacity\":0.8,\"should_show_f... | \n", 226 | "{\"web\":{\"project\":\"https://www.kickstarter.com... | \n", 227 | "https://www.kickstarter.com/discover/categorie... | \n", 228 | "NaN | \n", 229 | "NaN | \n", 230 | "NaN | \n", 231 | "NaN | \n", 232 | "failed | \n", 233 | "
\n", 523 | " | goal | \n", 524 | "static_usd_rate | \n", 525 | "
---|---|---|
0 | \n", 530 | "NaN | \n", 531 | "inf | \n", 532 | "
1 | \n", 535 | "NaN | \n", 536 | "inf | \n", 537 | "
2 | \n", 540 | "NaN | \n", 541 | "inf | \n", 542 | "
3 | \n", 545 | "NaN | \n", 546 | "inf | \n", 547 | "
4 | \n", 550 | "NaN | \n", 551 | "inf | \n", 552 | "
5 | \n", 555 | "NaN | \n", 556 | "inf | \n", 557 | "
6 | \n", 560 | "NaN | \n", 561 | "inf | \n", 562 | "