├── tests ├── __init__.py └── test_numbers.py ├── swe_barebones ├── __init__.py └── numbers.py ├── github_exercise_solution ├── github │ ├── __init__.py │ ├── .repos.py.swp │ └── repos.py └── tests │ ├── __init__.py │ └── test_github_repositories.py ├── run_tests.sh ├── requirements.txt ├── README.md ├── .gitignore └── unit_testing.ipynb /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /swe_barebones/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /github_exercise_solution/github/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /github_exercise_solution/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | py.test tests 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | pytest 4 | pylint 5 | pandas 6 | scikit-learn 7 | requests 8 | -------------------------------------------------------------------------------- /github_exercise_solution/github/.repos.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/swe-barebones/master/github_exercise_solution/github/.repos.py.swp -------------------------------------------------------------------------------- /swe_barebones/numbers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Numbers: 4 | 5 | def __init__(self): 6 | pass 7 | 8 | def perfect_number(self,n): 9 | factors = [] 10 | factors.append(1) 11 | factors.append(n) 12 | for i in range(2,n): 13 | if (n % i == 0): 14 | factors.append(i) 15 | # sum factors 16 | sum = 0 17 | for f in factors: 18 | sum += f 19 | 20 | # decide if it's perfect 21 | return (sum - n == n) 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | #Introduction 3 | 4 | This is a barebones framework to see how a repo with testing can be set up. 5 | 6 | # How to get it 7 | 8 | git clone https://github.com/InsightDataScience/swe-barebones.git 9 | 10 | pip3 install -r -U requirements.txt 11 | 12 | # Run it 13 | 14 | jupyter notebook unit_testing.ipynb 15 | 16 | Run through the cells and make sure everything works (tough!) 17 | 18 | # Run tests 19 | 20 | From the top level directory, run: 21 | 22 | py.test tests 23 | 24 | # Go Develop 25 | 26 | Please ask us if you have any questions about why things are set up the way they are, if it seems dumb, try to think about why it might be that way. 27 | 28 | -------------------------------------------------------------------------------- /tests/test_numbers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | """ 3 | This script tests methods in a number theory class called Numbers 4 | """ 5 | 6 | @pytest.fixture 7 | def num(): 8 | from swe_barebones.numbers import Numbers 9 | return Numbers() 10 | 11 | test_perfect_number_data = [(6,True),(28,True),(496,True),(8,False)] 12 | @pytest.mark.parametrize('x,expected',test_perfect_number_data) 13 | def test_perfect_number(x,expected, num): 14 | #num = Numbers() 15 | assert num.perfect_number(x) == expected 16 | 17 | perfect_numbers = [6,28,496,8128,33550336] 18 | def test_perfect_number_large_p(num): 19 | #num = Numbers() 20 | for x in range(2,1000): 21 | if x in perfect_numbers: 22 | assert num.perfect_number(x) == True 23 | else: 24 | assert num.perfect_number(x) == False 25 | -------------------------------------------------------------------------------- /github_exercise_solution/github/repos.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import itertools 3 | ''' 4 | A set of tools to analyze Github repositories. 5 | ''' 6 | 7 | def get_user_json(username): 8 | return requests.get('https://api.github.com/users/%s/repos' % (username)).json() 9 | 10 | def get_contributor_json(username, repository): 11 | return requests.get('https://api.github.com/repos/%s/%s/contributors' % (username, repository)).json() 12 | 13 | def get_repository_list(username): 14 | r = get_user_json(username) 15 | user_repos = [entry['name'] for entry in r] 16 | return user_repos 17 | 18 | def get_contributor_list(username, repository): 19 | r = get_contributor_json(username, repository) 20 | contributors = [entry['login'] for entry in r] 21 | return contributors 22 | 23 | def get_total_contributors(username): 24 | all_contrib = [get_contributor_list(username, repo) for repo in get_repository_list(username)] 25 | all_contrib = list(itertools.chain.from_iterable(all_contrib)) 26 | unique_contrib = set(all_contrib) 27 | return len(unique_contrib) 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /github_exercise_solution/tests/test_github_repositories.py: -------------------------------------------------------------------------------- 1 | from github import repos 2 | import pytest 3 | from voluptuous import REMOVE_EXTRA, Schema, Url 4 | from datetime import datetime 5 | 6 | """ 7 | This script tests methods in a Github repository class 8 | """ 9 | 10 | def test_repo_schema(): 11 | js = repos.get_user_json('bmregner') 12 | def date_format(fmt='%Y-%m-%dT%XZ'): 13 | return lambda v: datetime.strptime(v, fmt) 14 | user_schema = Schema([{'full_name': str, 15 | 'updated_at': date_format(), 16 | 'url': Url(), 17 | 'size': int, 18 | 'private': bool}], 19 | extra=REMOVE_EXTRA) 20 | assert user_schema(js) 21 | 22 | def test_contrib_schema(): 23 | js = repos.get_contributor_json('bmregner','finance_prediction') 24 | contrib_schema = Schema([{'login': str, 25 | 'html_url': Url(), 26 | 'contributions': int}], 27 | extra=REMOVE_EXTRA) 28 | assert contrib_schema(js) 29 | 30 | # Uses monkeypatch to return results without sending a request to github 31 | def test_total_contrib(monkeypatch): 32 | def repo_list(username): 33 | return ['me.github.io', 'myproject', 'mydemo'] 34 | def contrib_list(username,repo): 35 | if repo == 'me.github.io': 36 | return ['me'] 37 | elif repo == 'myproject': 38 | return ['John', 'Bob', 'Gary', 'me'] 39 | elif repo == 'mydemo': 40 | return ['me'] 41 | monkeypatch.setattr(repos,'get_contributor_list',contrib_list) 42 | monkeypatch.setattr(repos,'get_repository_list',repo_list) 43 | unique_contrib = repos.get_total_contributors('me') 44 | assert unique_contrib == 4 45 | -------------------------------------------------------------------------------- /unit_testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Perfect number example**\n", 8 | "\n", 9 | "A perfect number is one where the factors of a number other than itself add up to the number.\n", 10 | "Factors of 6 are 1,2,3,6 and 1 + 2 + 3 = 6\n", 11 | "\n", 12 | "What do we need to test for:\n", 13 | "- Do we get the correct response for a few known perfect numbers?\n", 14 | "- Do we get the correct response for a few known non-perfect numbers?\n", 15 | "- Did we factorize correctly?\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 76, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "class Numbers:\n", 27 | "\n", 28 | " def __init__(self):\n", 29 | " pass\n", 30 | "\n", 31 | " def perfect_number(self,n):\n", 32 | " factors = []\n", 33 | " factors.append(1)\n", 34 | " factors.append(n)\n", 35 | " for i in range(2,n):\n", 36 | " if (n % i == 0):\n", 37 | " factors.append(i)\n", 38 | " # sum factors\n", 39 | " sum = 0\n", 40 | " for f in factors:\n", 41 | " sum += f\n", 42 | "\n", 43 | " # decide if it's perfect\n", 44 | " return (sum - n == n)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 77, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "def test_perfect_number():\n", 56 | " num = Numbers()\n", 57 | " assert num.perfect_number(6) == True\n", 58 | " assert num.perfect_number(28) == True\n", 59 | " assert num.perfect_number(30) == False" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "This works, but if we wanted to test every edge case this is going to start to look nasty. Instead, we can use a package called `pytest` which simplifies things. Run the line below to see the output, and then go and check what it looks like in the repository." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 78, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "\u001b[1m============================= test session starts ==============================\u001b[0m\n", 81 | "platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.32, pluggy-0.4.0\n", 82 | "rootdir: /Users/BenRegner/dev/insight/swe-barebones, inifile: \n", 83 | "collected 5 items \n", 84 | "\u001b[0m\n", 85 | "tests/test_numbers.py .....\n", 86 | "\n", 87 | "\u001b[32m\u001b[1m=========================== 5 passed in 0.78 seconds ===========================\u001b[0m\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "!py.test tests" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Lets take a look at whats going on in there using the %load magic function" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "%load tests/test_numbers.py" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "The first test uses parametrize, which gives you a lot of flexibility. A great write up of everything you can do with parametrize can be found in the official documentation here: http://doc.pytest.org/en/latest/example/parametrize.html\n", 118 | "\n", 119 | "In the second test we run a loop to test many numbers. We could have also used parametrize for this test, although each test in parametrize stands alone, so the output becomes a little long. Go to the repository (not the cell above) and try changing this example to use the parametrize decorator. Run ```$ py.test tests``` and see how the output changes.\n", 120 | "\n", 121 | "This isn't super slow, but looking at our code we can find an inefficiency in our implementation. If we think a bit, we can see that we don't need to loop over all numbers up to the number we are checking, but can pick off pairs of factors as we find them. This means we only need to go up to the square root of the number we are checking. Below is a new implementation:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 80, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "import numpy as np\n", 133 | "\n", 134 | "# Don't forget self when putting into a class!\n", 135 | "def perfect_number_optimized(n):\n", 136 | " factors = []\n", 137 | " factors.append(1)\n", 138 | " factors.append(n)\n", 139 | " for i in range(2,int(np.sqrt(n))+1):\n", 140 | " if (n % i == 0):\n", 141 | " factors.append(i)\n", 142 | " factors.append(n // i)\n", 143 | " # sum factors\n", 144 | " sum = 0\n", 145 | " print(factors)\n", 146 | " for f in factors:\n", 147 | " sum += f\n", 148 | "\n", 149 | " # decide if it's perfect\n", 150 | " return (sum - n == n)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "Try putting this into the Numbers class, change the tests to use this version, and run them again. It runs a little faster, so we're making progress! But we still haven't implemented all the tests we talked about early on. Lets refactor this code to do the factorization separately, which conveniently also lets us test that we're doing factorization correctly!" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 81, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "# Don't forget self when putting into a class!\n", 169 | "def factorize(n):\n", 170 | " factors = []\n", 171 | " factors.append(1)\n", 172 | " factors.append(n)\n", 173 | " for i in range(2,int(np.sqrt(n))+1):\n", 174 | " if (n % i == 0):\n", 175 | " factors.append(i)\n", 176 | " factors.append(n // i)\n", 177 | " return factors\n", 178 | "\n", 179 | "def perfect_number_optimized(n):\n", 180 | " factors = factorize(n)\n", 181 | " sum = 0\n", 182 | " for f in factors:\n", 183 | " sum += f\n", 184 | " # decide if it's perfect\n", 185 | " return (sum - n == n)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "raw", 190 | "metadata": {}, 191 | "source": [ 192 | "Put the following in ./tests/test_numbers.py\n", 193 | "\n", 194 | "```\n", 195 | "test_factorize_data = [(6,[1,2,3,6]),(10,[1,2,5,10]),(16,[1,2,4,8,16])]\n", 196 | "@pytest.mark.parametrize('x,expected',test_factorize_data)\n", 197 | "def test_factorize(x,expected):\n", 198 | " num = Numbers()\n", 199 | " assert sorted(num.factorize(x)) == sorted(expected)\n", 200 | "```\n", 201 | "\n", 202 | "Run py.test tests.\n", 203 | "\n", 204 | "One test failed! And it reports that the number 16 is a problem. Lets take a look." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 82, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "[1, 16, 2, 8, 4, 4]" 218 | ] 219 | }, 220 | "execution_count": 82, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "factorize(16)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "Whats going on here? If we look back at our implementation, we immediately see the problem. For perfect squares, we end up adding them twice in the lines\n", 234 | "```\n", 235 | "factors.append(i)\n", 236 | "factors.append(n // i)\n", 237 | "```\n", 238 | "There are a few ways to fix this, the easiest being a conditional check:\n", 239 | "```\n", 240 | "for i in range(2,int(np.sqrt(n))+1):\n", 241 | " if (n % i == 0):\n", 242 | " factors.append(i)\n", 243 | " if (n // i != i):\n", 244 | " factors.append(n // i)\n", 245 | "\n", 246 | "```\n", 247 | "\n", 248 | "Make this change in the class and try running the tests again. Now we're passing all the tests. Note that we would have missed this if we hadn't included a specific example that caught the problem. **Unit testing doesn't prevent all bugs!** But they will help you avoid many many problems." 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "**Github Excercise**\n", 256 | "\n", 257 | "Build a function or set of functions to calculate the total number of distinct contributors on all repositories for any given username\n", 258 | "\n", 259 | "API endpoints: (test it out with curl!)\n", 260 | "List of all repos for a given username: https://api.github.com/users/{username}/repos\n", 261 | "List of all contributors for a given username and repository: https://api.github.com/repos/{username}/{repo}/contributors \n", 262 | "\n", 263 | "Below you'll find some code to help get you started. How would you write tests to make sure you detect it if Github changes their API? What other tests should you write to make sure the user of your new function always gets what the expect?" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 83, 269 | "metadata": { 270 | "collapsed": false 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "import requests\n", 275 | "\n", 276 | "API_ENDPOINT = \"https://api.github.com\"\n", 277 | "def fetch_repo_names(userid):\n", 278 | " resp = requests.get(\"{}/users/{}/repos\".format(API_ENDPOINT, userid))\n", 279 | " return [x['name'] for x in resp.json()]\n", 280 | "\n", 281 | "def fetch_repo_contributors(userid, reponame):\n", 282 | " resp = requests.get(\"{}/repos/{}/{}/contributors\".format(API_ENDPOINT, userid, reponame))\n", 283 | " return [x['login'] for x in resp.json()]" 284 | ] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.5.2" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | --------------------------------------------------------------------------------