├── tests
    ├── __init__.py
    └── test_numbers.py
├── swe_barebones
    ├── __init__.py
    └── numbers.py
├── github_exercise_solution
    ├── github
    │   ├── __init__.py
    │   ├── .repos.py.swp
    │   └── repos.py
    └── tests
    │   ├── __init__.py
    │   └── test_github_repositories.py
├── run_tests.sh
├── requirements.txt
├── README.md
├── .gitignore
└── unit_testing.ipynb


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swe_barebones/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/github_exercise_solution/github/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/github_exercise_solution/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | py.test tests
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | pytest
4 | pylint
5 | pandas
6 | scikit-learn
7 | requests
8 | 


--------------------------------------------------------------------------------
/github_exercise_solution/github/.repos.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InsightDataScience/swe-barebones/master/github_exercise_solution/github/.repos.py.swp


--------------------------------------------------------------------------------
/swe_barebones/numbers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Numbers:
 4 |     
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def perfect_number(self,n):
 9 |         factors = []
10 |         factors.append(1)
11 |         factors.append(n)
12 |         for i in range(2,n):
13 |             if (n % i == 0):
14 |                 factors.append(i)
15 |         # sum factors
16 |         sum = 0
17 |         for f in factors:
18 |         	sum += f
19 |         
20 |         # decide if it's perfect
21 |         return (sum - n == n)
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | #Introduction
 3 | 
 4 | This is a barebones framework to see how a repo with testing can be set up. 
 5 | 
 6 | # How to get it
 7 | 
 8 | git clone https://github.com/InsightDataScience/swe-barebones.git
 9 | 
10 | pip3 install -r -U requirements.txt
11 | 
12 | # Run it
13 | 
14 | jupyter notebook unit_testing.ipynb
15 | 
16 | Run through the cells and make sure everything works (tough!)
17 | 
18 | # Run tests
19 | 
20 | From the top level directory, run:
21 | 
22 | py.test tests
23 | 
24 | # Go Develop
25 | 
26 | Please ask us if you have any questions about why things are set up the way they are, if it seems dumb, try to think about why it might be that way.
27 | 
28 | 


--------------------------------------------------------------------------------
/tests/test_numbers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | """
 3 | This script tests methods in a number theory class called Numbers
 4 | """
 5 | 
 6 | @pytest.fixture
 7 | def num():
 8 |     from swe_barebones.numbers import Numbers
 9 |     return Numbers()
10 | 
11 | test_perfect_number_data = [(6,True),(28,True),(496,True),(8,False)]
12 | @pytest.mark.parametrize('x,expected',test_perfect_number_data)
13 | def test_perfect_number(x,expected, num):
14 |     #num = Numbers()
15 |     assert num.perfect_number(x) == expected
16 | 
17 | perfect_numbers = [6,28,496,8128,33550336]
18 | def test_perfect_number_large_p(num):
19 |     #num = Numbers()
20 |     for x in range(2,1000):
21 |         if x in perfect_numbers:
22 |             assert num.perfect_number(x) == True
23 |         else:
24 |             assert num.perfect_number(x) == False
25 | 


--------------------------------------------------------------------------------
/github_exercise_solution/github/repos.py:
--------------------------------------------------------------------------------
 1 | import requests 
 2 | import itertools 
 3 | ''' 
 4 | A set of tools to analyze Github repositories.
 5 | '''
 6 | 
 7 | def get_user_json(username):
 8 |     return requests.get('https://api.github.com/users/%s/repos' % (username)).json()
 9 | 
10 | def get_contributor_json(username, repository):
11 |     return requests.get('https://api.github.com/repos/%s/%s/contributors' % (username, repository)).json()
12 | 
13 | def get_repository_list(username):
14 |     r = get_user_json(username)
15 |     user_repos = [entry['name'] for entry in r]
16 |     return user_repos
17 | 
18 | def get_contributor_list(username, repository):
19 |     r = get_contributor_json(username, repository)
20 |     contributors = [entry['login'] for entry in r]    
21 |     return contributors
22 | 
23 | def get_total_contributors(username):
24 |     all_contrib = [get_contributor_list(username, repo) for repo in get_repository_list(username)]
25 |     all_contrib = list(itertools.chain.from_iterable(all_contrib))  
26 |     unique_contrib = set(all_contrib)
27 |     return len(unique_contrib)
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/github_exercise_solution/tests/test_github_repositories.py:
--------------------------------------------------------------------------------
 1 | from github import repos
 2 | import pytest
 3 | from voluptuous import REMOVE_EXTRA, Schema, Url
 4 | from datetime import datetime
 5 | 
 6 | """
 7 | This script tests methods in a Github repository class
 8 | """
 9 | 
10 | def test_repo_schema():
11 |     js = repos.get_user_json('bmregner')
12 |     def date_format(fmt='%Y-%m-%dT%XZ'):
13 |         return lambda v: datetime.strptime(v, fmt)
14 |     user_schema = Schema([{'full_name': str,
15 |                         'updated_at': date_format(),
16 |                         'url': Url(),
17 |                         'size': int,
18 |                         'private': bool}],
19 |                          extra=REMOVE_EXTRA)
20 |     assert user_schema(js)
21 | 
22 | def test_contrib_schema():
23 |     js = repos.get_contributor_json('bmregner','finance_prediction')
24 |     contrib_schema = Schema([{'login': str,
25 |                         'html_url': Url(),
26 |                         'contributions': int}],
27 |                          extra=REMOVE_EXTRA) 
28 |     assert contrib_schema(js)
29 | 
30 | # Uses monkeypatch to return results without sending a request to github
31 | def test_total_contrib(monkeypatch):
32 |     def repo_list(username):
33 |         return ['me.github.io', 'myproject', 'mydemo']
34 |     def contrib_list(username,repo):
35 |         if repo == 'me.github.io':
36 |             return ['me']
37 |         elif repo == 'myproject':
38 |             return ['John', 'Bob', 'Gary', 'me']
39 |         elif repo == 'mydemo': 
40 |             return ['me']
41 |     monkeypatch.setattr(repos,'get_contributor_list',contrib_list)
42 |     monkeypatch.setattr(repos,'get_repository_list',repo_list)
43 |     unique_contrib = repos.get_total_contributors('me')
44 |     assert unique_contrib == 4
45 | 


--------------------------------------------------------------------------------
/unit_testing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Perfect number example**\n",
  8 |     "\n",
  9 |     "A perfect number is one where the factors of a number other than itself add up to the number.\n",
 10 |     "Factors of 6 are 1,2,3,6 and 1 + 2 + 3 = 6\n",
 11 |     "\n",
 12 |     "What do we need to test for:\n",
 13 |     "- Do we get the correct response for a few known perfect numbers?\n",
 14 |     "- Do we get the correct response for a few known non-perfect numbers?\n",
 15 |     "- Did we factorize correctly?\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 76,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "class Numbers:\n",
 27 |     "\n",
 28 |     "    def __init__(self):\n",
 29 |     "        pass\n",
 30 |     "\n",
 31 |     "    def perfect_number(self,n):\n",
 32 |     "        factors = []\n",
 33 |     "        factors.append(1)\n",
 34 |     "        factors.append(n)\n",
 35 |     "        for i in range(2,n):\n",
 36 |     "            if (n % i == 0):\n",
 37 |     "                factors.append(i)\n",
 38 |     "        # sum factors\n",
 39 |     "        sum = 0\n",
 40 |     "        for f in factors:\n",
 41 |     "                sum += f\n",
 42 |     "\n",
 43 |     "        # decide if it's perfect\n",
 44 |     "        return (sum - n == n)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 77,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "def test_perfect_number():\n",
 56 |     "    num = Numbers()\n",
 57 |     "    assert num.perfect_number(6) == True\n",
 58 |     "    assert num.perfect_number(28) == True\n",
 59 |     "    assert num.perfect_number(30) == False"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "This works, but if we wanted to test every edge case this is going to start to look nasty. Instead, we can use a package called `pytest` which simplifies things. Run the line below to see the output, and then go and check what it looks like in the repository."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 78,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "\u001b[1m============================= test session starts ==============================\u001b[0m\n",
 81 |       "platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.32, pluggy-0.4.0\n",
 82 |       "rootdir: /Users/BenRegner/dev/insight/swe-barebones, inifile: \n",
 83 |       "collected 5 items \n",
 84 |       "\u001b[0m\n",
 85 |       "tests/test_numbers.py .....\n",
 86 |       "\n",
 87 |       "\u001b[32m\u001b[1m=========================== 5 passed in 0.78 seconds ===========================\u001b[0m\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "!py.test tests"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "Lets take a look at whats going on in there using the %load magic function"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": true
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "%load tests/test_numbers.py"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "The first test uses parametrize, which gives you a lot of flexibility. A great write up of everything you can do with parametrize can be found in the official documentation here: http://doc.pytest.org/en/latest/example/parametrize.html\n",
118 |     "\n",
119 |     "In the second test we run a loop to test many numbers. We could have also used parametrize for this test, although each test in parametrize stands alone, so the output becomes a little long. Go to the repository (not the cell above) and try changing this example to use the parametrize decorator. Run ```$ py.test tests``` and see how the output changes.\n",
120 |     "\n",
121 |     "This isn't super slow, but looking at our code we can find an inefficiency in our implementation. If we think a bit, we can see that we don't need to loop over all numbers up to the number we are checking, but can pick off pairs of factors as we find them. This means we only need to go up to the square root of the number we are checking. Below is a new implementation:"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 80,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "import numpy as np\n",
133 |     "\n",
134 |     "# Don't forget self when putting into a class!\n",
135 |     "def perfect_number_optimized(n):\n",
136 |     "    factors = []\n",
137 |     "    factors.append(1)\n",
138 |     "    factors.append(n)\n",
139 |     "    for i in range(2,int(np.sqrt(n))+1):\n",
140 |     "        if (n % i == 0):\n",
141 |     "            factors.append(i)\n",
142 |     "            factors.append(n // i)\n",
143 |     "    # sum factors\n",
144 |     "    sum = 0\n",
145 |     "    print(factors)\n",
146 |     "    for f in factors:\n",
147 |     "        sum += f\n",
148 |     "\n",
149 |     "    # decide if it's perfect\n",
150 |     "    return (sum - n == n)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "Try putting this into the Numbers class, change the tests to use this version, and run them again. It runs a little faster, so we're making progress! But we still haven't implemented all the tests we talked about early on. Lets refactor this code to do the factorization separately, which conveniently also lets us test that we're doing factorization correctly!"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 81,
163 |    "metadata": {
164 |     "collapsed": true
165 |    },
166 |    "outputs": [],
167 |    "source": [
168 |     "# Don't forget self when putting into a class!\n",
169 |     "def factorize(n):\n",
170 |     "    factors = []\n",
171 |     "    factors.append(1)\n",
172 |     "    factors.append(n)\n",
173 |     "    for i in range(2,int(np.sqrt(n))+1):\n",
174 |     "        if (n % i == 0):\n",
175 |     "            factors.append(i)\n",
176 |     "            factors.append(n // i)\n",
177 |     "    return factors\n",
178 |     "\n",
179 |     "def perfect_number_optimized(n):\n",
180 |     "    factors = factorize(n)\n",
181 |     "    sum = 0\n",
182 |     "    for f in factors:\n",
183 |     "        sum += f\n",
184 |     "    # decide if it's perfect\n",
185 |     "    return (sum - n == n)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "raw",
190 |    "metadata": {},
191 |    "source": [
192 |     "Put the following in ./tests/test_numbers.py\n",
193 |     "\n",
194 |     "```\n",
195 |     "test_factorize_data = [(6,[1,2,3,6]),(10,[1,2,5,10]),(16,[1,2,4,8,16])]\n",
196 |     "@pytest.mark.parametrize('x,expected',test_factorize_data)\n",
197 |     "def test_factorize(x,expected):\n",
198 |     "    num = Numbers()\n",
199 |     "    assert sorted(num.factorize(x)) == sorted(expected)\n",
200 |     "```\n",
201 |     "\n",
202 |     "Run py.test tests.\n",
203 |     "\n",
204 |     "One test failed! And it reports that the number 16 is a problem. Lets take a look."
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 82,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/plain": [
217 |        "[1, 16, 2, 8, 4, 4]"
218 |       ]
219 |      },
220 |      "execution_count": 82,
221 |      "metadata": {},
222 |      "output_type": "execute_result"
223 |     }
224 |    ],
225 |    "source": [
226 |     "factorize(16)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "Whats going on here? If we look back at our implementation, we immediately see the problem. For perfect squares, we end up adding them twice in the lines\n",
234 |     "```\n",
235 |     "factors.append(i)\n",
236 |     "factors.append(n // i)\n",
237 |     "```\n",
238 |     "There are a few ways to fix this, the easiest being a conditional check:\n",
239 |     "```\n",
240 |     "for i in range(2,int(np.sqrt(n))+1):\n",
241 |     "    if (n % i == 0):\n",
242 |     "        factors.append(i)\n",
243 |     "        if (n // i !=  i):\n",
244 |     "            factors.append(n // i)\n",
245 |     "\n",
246 |     "```\n",
247 |     "\n",
248 |     "Make this change in the class and try running the tests again. Now we're passing all the tests. Note that we would have missed this if we hadn't included a specific example that caught the problem. **Unit testing doesn't prevent all bugs!** But they will help you avoid many many problems."
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "**Github Excercise**\n",
256 |     "\n",
257 |     "Build a function or set of functions to calculate the total number of distinct contributors on all repositories for any given username\n",
258 |     "\n",
259 |     "API endpoints: (test it out with curl!)\n",
260 |     "List of all repos for a given username: https://api.github.com/users/{username}/repos\n",
261 |     "List of all contributors for a given username and repository: https://api.github.com/repos/{username}/{repo}/contributors \n",
262 |     "\n",
263 |     "Below you'll find some code to help get you started. How would you write tests to make sure you detect it if Github changes their API? What other tests should you write to make sure the user of your new function always gets what the expect?"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 83,
269 |    "metadata": {
270 |     "collapsed": false
271 |    },
272 |    "outputs": [],
273 |    "source": [
274 |     "import requests\n",
275 |     "\n",
276 |     "API_ENDPOINT = \"https://api.github.com\"\n",
277 |     "def fetch_repo_names(userid):\n",
278 |     "    resp = requests.get(\"{}/users/{}/repos\".format(API_ENDPOINT, userid))\n",
279 |     "    return [x['name'] for x in resp.json()]\n",
280 |     "\n",
281 |     "def fetch_repo_contributors(userid, reponame):\n",
282 |     "    resp = requests.get(\"{}/repos/{}/{}/contributors\".format(API_ENDPOINT, userid, reponame))\n",
283 |     "    return [x['login'] for x in resp.json()]"
284 |    ]
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "Python 3",
290 |    "language": "python",
291 |    "name": "python3"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.5.2"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 2
308 | }
309 | 


--------------------------------------------------------------------------------