├── elex2 ├── __init__.py ├── tests │ ├── __init__.py │ ├── sample_results.csv │ ├── test_parser.py │ ├── sample_results_parsed.json │ ├── sample_results_parsed_tie_race.json │ └── test_summary.py └── election_results.py ├── elex3 ├── __init__.py ├── lib │ ├── __init__.py │ ├── scraper.py │ ├── parser.py │ └── summary.py ├── tests │ ├── __init__.py │ ├── sample_results.csv │ ├── test_parser.py │ ├── sample_results_parsed.json │ ├── sample_results_parsed_tie_race.json │ └── test_summary.py └── scripts │ └── save_summary_to_csv.py ├── elex4 ├── __init__.py ├── lib │ ├── __init__.py │ ├── scraper.py │ ├── parser.py │ ├── summary.py │ └── models.py ├── tests │ ├── __init__.py │ ├── test_parser.py │ ├── sample_results_parsed.json │ ├── sample_results_parsed_tie_race.json │ ├── test_summary.py │ └── test_models.py └── scripts │ └── save_summary_to_csv.py ├── .gitignore ├── requirements.txt ├── _docs ├── whats_next.rst ├── resources.rst ├── phase4 │ ├── overview.rst │ ├── candidates.rst │ ├── swapout.rst │ └── races.rst ├── Makefile ├── phase2.rst ├── phase3.rst ├── index.rst ├── faq.rst ├── phase1.rst └── conf.py ├── fabfile.py ├── README.md └── elex1 └── election_results.py /elex2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex4/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex2/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex3/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex3/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex4/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /elex4/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.csv 3 | *.vim 4 | *_bkup* 5 | html/* 6 | _docs/_build/* 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Fabric==1.8.2 2 | Jinja2==2.7.2 3 | MarkupSafe==0.18 4 | Pygments==1.6 5 | Sphinx==1.2.1 6 | docutils==0.11 7 | ecdsa==0.10 8 | paramiko==1.12.2 9 | pycrypto==2.6.1 10 | wsgiref==0.1.2 11 | -------------------------------------------------------------------------------- /elex2/tests/sample_results.csv: -------------------------------------------------------------------------------- 1 | date,office,district,county,candidate,party,votes 2 | 2012-11-06,President,,Some County,"Smith, Joe",GOP,10 3 | 2012-11-06,President,,Some County,"Doe, Jane",DEM,11 4 | 2012-11-06,President,,Another County,"Smith, Joe",GOP,5 5 | 2012-11-06,President,,Another County,"Doe, Jane",DEM,5 6 | -------------------------------------------------------------------------------- /elex3/tests/sample_results.csv: -------------------------------------------------------------------------------- 1 | date,office,district,county,candidate,party,votes 2 | 2012-11-06,President,,Some County,"Smith, Joe",GOP,10 3 | 2012-11-06,President,,Some County,"Doe, Jane",DEM,11 4 | 2012-11-06,President,,Another County,"Smith, Joe",GOP,5 5 | 2012-11-06,President,,Another County,"Doe, Jane",DEM,5 6 | -------------------------------------------------------------------------------- /_docs/whats_next.rst: -------------------------------------------------------------------------------- 1 | What's Next? 2 | ============ 3 | 4 | There is a much bigger world of Python language features and object-oriented 5 | programming that we didn't cover here. Below are just a few topics that are 6 | worth studying as you develop your programming skills. 7 | 8 | - *super* (for calling methods on parent classes) 9 | - Multiple inheritance and method resolution order (see the Diamond problem) 10 | - Decorators 11 | - Descriptors 12 | - Meta-programming and \_\_new\_\_ constructor 13 | -------------------------------------------------------------------------------- /elex4/tests/test_parser.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | 4 | from elex4.lib.parser import parse_and_clean 5 | 6 | 7 | class TestParser(TestCase): 8 | 9 | def test_name_parsing(self): 10 | "Parser should split full candidate name into first and last names" 11 | path = join(dirname(__file__), 'sample_results.csv') 12 | results = parse_and_clean(path) 13 | race = results['President'] 14 | smith = [cand for cand in race.candidates.values() if cand.last_name == 'Smith'][0] 15 | self.assertEqual(smith.first_name, 'Joe') 16 | self.assertEqual(smith.last_name, 'Smith') 17 | -------------------------------------------------------------------------------- /elex3/tests/test_parser.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | 4 | from elex3.lib.parser import parse_and_clean 5 | 6 | 7 | class TestParser(TestCase): 8 | 9 | def test_name_parsing(self): 10 | "Parser should split full candidate name into first and last names" 11 | path = join(dirname(__file__), 'sample_results.csv') 12 | results = parse_and_clean(path) 13 | race_key = 'President' 14 | cand_key = 'GOP-Smith, Joe' 15 | # Get one county result 16 | smith = results[race_key][cand_key][0] 17 | self.assertEqual(smith['first_name'], 'Joe') 18 | self.assertEqual(smith['last_name'], 'Smith') 19 | -------------------------------------------------------------------------------- /elex2/tests/test_parser.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | 4 | from elex2.election_results import parse_and_clean 5 | 6 | 7 | 8 | class TestParser(TestCase): 9 | 10 | def test_name_parsing(self): 11 | "Parser should split full candidate name into first and last names" 12 | path = join(dirname(__file__), 'sample_results.csv') 13 | results = parse_and_clean(path) 14 | race_key = 'President' 15 | cand_key = 'GOP-Smith, Joe' 16 | # Get one county result 17 | smith = results[race_key][cand_key][0] 18 | self.assertEqual(smith['first_name'], 'Joe') 19 | self.assertEqual(smith['last_name'], 'Smith') 20 | -------------------------------------------------------------------------------- /elex3/lib/scraper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from urllib import urlretrieve 3 | 4 | 5 | def download_results(path): 6 | """Download CSV of fake Virginia election results from GDocs 7 | 8 | Downloads the file to the root of the repo (/path/to/refactoring101/). 9 | 10 | NOTE: This will only download the file if it doesn't already exist 11 | This approach is simplified for demo purposes. In a real-life application, 12 | you'd likely have a considerable amount of additional code 13 | to appropriately handle HTTP timeouts, 404s, and other real-world scenarios. 14 | For example, you might retry a request several times after a timeout, and then 15 | send an email alert that the site is non-responsive. 16 | 17 | """ 18 | url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv" 19 | urlretrieve(url, path) 20 | -------------------------------------------------------------------------------- /elex4/lib/scraper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from urllib import urlretrieve 3 | 4 | 5 | def download_results(path): 6 | """Download CSV of fake Virginia election results from GDocs 7 | 8 | Downloads the file to the root of the repo (/path/to/refactoring101/). 9 | 10 | NOTE: This will only download the file if it doesn't already exist 11 | This approach is simplified for demo purposes. In a real-life application, 12 | you'd likely have a considerable amount of additional code 13 | to appropriately handle HTTP timeouts, 404s, and other real-world scenarios. 14 | For example, you might retry a request several times after a timeout, and then 15 | send an email alert that the site is non-responsive. 16 | 17 | """ 18 | url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv" 19 | urlretrieve(url, path) 20 | -------------------------------------------------------------------------------- /elex4/lib/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from collections import defaultdict 4 | 5 | from elex4.lib.models import Race 6 | 7 | 8 | def parse_and_clean(path): 9 | """Parse downloaded results file. 10 | 11 | 12 | RETURNS: 13 | 14 | A dictionary containing race key and Race instances as values. 15 | 16 | """ 17 | # Create reader for ingesting CSV as array of dicts 18 | reader = csv.DictReader(open(path, 'rb')) 19 | 20 | results = {} 21 | 22 | # Initial data clean-up 23 | for row in reader: 24 | # Convert votes to integer 25 | row['votes'] = int(row['votes']) 26 | 27 | # Store races by slugified office and district (if there is one) 28 | race_key = row['office'] 29 | if row['district']: 30 | race_key += "-%s" % row['district'] 31 | 32 | try: 33 | race = results[race_key] 34 | except KeyError: 35 | race = Race(row['date'], row['office'], row['district']) 36 | results[race_key] = race 37 | 38 | race.add_result(row) 39 | 40 | return results 41 | -------------------------------------------------------------------------------- /_docs/resources.rst: -------------------------------------------------------------------------------- 1 | .. _Resources: 2 | 3 | Resources 4 | ========= 5 | 6 | Some resources geared for the intermediate Python programmer. 7 | 8 | - `Hitchhiker’s Guide to 9 | Python `__, especially the 10 | section on `Structuring your 11 | Project `__ 12 | and `Testing Your 13 | Code `__ 14 | - `Dive Into Python 3 `__ is 15 | a solid book for the intermediate programmer looking to deepen his or 16 | her skills. Especially check out the sections on `Unit 17 | Testing `__ and 18 | `Refactoring `__. 19 | There's also the `Python 2.x 20 | version `__. 21 | - `Refactoring `__ is a 22 | classic. Yes, it's printed on dead trees and code samples are in 23 | Java. But the collective wisdom and many of the practical techniques 24 | remain invaluable. It's an eye-opener. 25 | 26 | -------------------------------------------------------------------------------- /_docs/phase4/overview.rst: -------------------------------------------------------------------------------- 1 | .. _OOdesign: 2 | 3 | OO Design and Refactoring 4 | ========================= 5 | 6 | Real World Objects 7 | ------------------ 8 | 9 | In this section, we create classes that model the real world of 10 | elections. These classes are intended to serve as more intuitive 11 | containers for data transformations and complex bits of logic currently 12 | scattered across our application. 13 | 14 | The goal is to `hide 15 | complexity `__ 16 | behind simple 17 | `interfaces `__. 18 | 19 | We perform these refactorings in a step-by-step fashion and attempt to 20 | `write tests before the actual 21 | code `__. 22 | 23 | So how do we start modeling our domain? We clearly have races and 24 | candidates, which seem like natural...wait for it... "candidates" for 25 | model classes. We also have county-level results associated with each 26 | candidate. 27 | 28 | Let's start by creating Candidate and Race classes with some simple 29 | behavior. These classes will eventually be our workhorses, handling most 30 | of the grunt work needed to produce the summary report. But let's start 31 | with the basics. 32 | -------------------------------------------------------------------------------- /elex4/lib/summary.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from operator import itemgetter 3 | 4 | 5 | def summarize(results): 6 | """Triggers winner assignments and formats data for output. 7 | 8 | RETURNS: 9 | 10 | Dictionary of results 11 | 12 | """ 13 | summary = {} 14 | 15 | for race_key, race in results.items(): 16 | cands = [] 17 | # Call our new assign_winner method 18 | race.assign_winner() 19 | # Loop through Candidate instances and extract a dictionary 20 | # of target values. Basically, we're throwing away county-level 21 | # results since we don't need those for the summary report 22 | for cand in race.candidates.values(): 23 | # Remove lower-level county results 24 | # This is a dirty little trick to botainfor easily obtaining 25 | # a dictionary of candidate attributes. 26 | info = cand.__dict__.copy() 27 | # Remove county results 28 | info.pop('county_results') 29 | cands.append(info) 30 | 31 | summary[race_key] = { 32 | 'all_votes': race.total_votes, 33 | 'date': race.date, 34 | 'office': race.office, 35 | 'district': race.district, 36 | 'candidates': cands, 37 | } 38 | 39 | return summary 40 | 41 | -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from fabric.api import * 4 | 5 | 6 | #def readme_to_rst(): 7 | # """ 8 | # Convert README.md to _docs/index.rst for Sphinx. 9 | # Convert the GitHub-friendly README.md to a Sphinx-friendly reStructured text file. 10 | # """ 11 | # 12 | # print "Converting markdown to reStructured text." 13 | # 14 | # # Try to run the pandoc command to convert our file. 15 | # test_pandoc = os.system('pandoc -s README.md -o _docs/index.rst') 16 | # 17 | # # If this fails to run for any reason, assume it's not installed and send a nice message. 18 | # if test_pandoc != 0: 19 | # print "You don't have pandoc installed! Go get it!\nhttp://johnmacfarlane.net/pandoc/installing.html" 20 | # 21 | # return False 22 | # 23 | # return True 24 | 25 | 26 | def build_sphinx_html(): 27 | """ 28 | Build HTML with Sphinx for our readme. 29 | Converts _docs/index.rst into a fancy HTML page with search and everything. 30 | """ 31 | os.system('rm -rf html') 32 | os.system('cd _docs && make html') 33 | 34 | def serve_sphinx(): 35 | """ 36 | Serve Sphinx HTML for Web browsers. 37 | Runs the Python SimpleHTTPServer on port 8000. 38 | """ 39 | print "Open a Web browser to http://127.0.0.1:8000/\n" 40 | os.system('cd html && python -m SimpleHTTPServer') 41 | 42 | def bootstrap_docs(): 43 | """ 44 | Setup docs. 45 | """ 46 | build_sphinx_html() 47 | serve_sphinx() 48 | -------------------------------------------------------------------------------- /_docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | 28 | clean: 29 | rm -rf $(BUILDDIR)/* 30 | 31 | html: 32 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 33 | @echo 34 | mv $(BUILDDIR)/html .. 35 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 36 | @echo "This directory has been copied to the project root." 37 | @echo "Enjoy!" -------------------------------------------------------------------------------- /elex4/tests/sample_results_parsed.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "candidate": "Doe, Jane", 4 | "county": "Some County", 5 | "date": "2012-11-06", 6 | "district": "", 7 | "first_name": "Jane", 8 | "last_name": "Doe", 9 | "office": "President", 10 | "party": "DEM", 11 | "votes": 11 12 | }, 13 | { 14 | "candidate": "Doe, Jane", 15 | "county": "Another County", 16 | "date": "2012-11-06", 17 | "district": "", 18 | "first_name": "Jane", 19 | "last_name": "Doe", 20 | "office": "President", 21 | "party": "DEM", 22 | "votes": 5 23 | }, 24 | { 25 | "candidate": "Smith, Joe", 26 | "county": "Some County", 27 | "date": "2012-11-06", 28 | "district": "", 29 | "first_name": "Joe", 30 | "last_name": "Smith", 31 | "office": "President", 32 | "party": "GOP", 33 | "votes": 10 34 | }, 35 | { 36 | "candidate": "Smith, Joe", 37 | "county": "Another County", 38 | "date": "2012-11-06", 39 | "district": "", 40 | "first_name": "Joe", 41 | "last_name": "Smith", 42 | "office": "President", 43 | "party": "GOP", 44 | "votes": 5 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /elex4/tests/sample_results_parsed_tie_race.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "candidate": "Doe, Jane", 4 | "county": "Some County", 5 | "date": "2012-11-06", 6 | "district": "", 7 | "first_name": "Jane", 8 | "last_name": "Doe", 9 | "office": "President", 10 | "party": "DEM", 11 | "votes": 10 12 | }, 13 | { 14 | "candidate": "Doe, Jane", 15 | "county": "Another County", 16 | "date": "2012-11-06", 17 | "district": "", 18 | "first_name": "Jane", 19 | "last_name": "Doe", 20 | "office": "President", 21 | "party": "DEM", 22 | "votes": 5 23 | }, 24 | { 25 | "candidate": "Smith, Joe", 26 | "county": "Some County", 27 | "date": "2012-11-06", 28 | "district": "", 29 | "first_name": "Joe", 30 | "last_name": "Smith", 31 | "office": "President", 32 | "party": "GOP", 33 | "votes": 10 34 | }, 35 | { 36 | "candidate": "Smith, Joe", 37 | "county": "Another County", 38 | "date": "2012-11-06", 39 | "district": "", 40 | "first_name": "Joe", 41 | "last_name": "Smith", 42 | "office": "President", 43 | "party": "GOP", 44 | "votes": 5 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /elex3/lib/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from collections import defaultdict 4 | 5 | 6 | def parse_and_clean(path): 7 | """Parse downloaded results file and perform various data clean-ups 8 | 9 | 10 | RETURNS: 11 | 12 | Nested dictionary keyed first by race, then candidate. 13 | Candidate value is an array of dicts containing county level results. 14 | 15 | """ 16 | # Create reader for ingesting CSV as array of dicts 17 | reader = csv.DictReader(open(path, 'rb')) 18 | 19 | # Use defaultdict to automatically create non-existent keys with an empty dictionary as the default value. 20 | # See https://pydocs2cn.readthedocs.org/en/latest/library/collections.html#defaultdict-objects 21 | results = defaultdict(dict) 22 | 23 | # Initial data clean-up 24 | for row in reader: 25 | # Perform some data clean-ups and conversions 26 | row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')] 27 | row['votes'] = int(row['votes']) 28 | 29 | # Store county-level results by slugified office and district (if there is one), 30 | # then by candidate party and raw name 31 | race_key = row['office'] 32 | if row['district']: 33 | race_key += "-%s" % row['district'] 34 | # Create unique candidate key from party and name, in case multiple candidates have same 35 | cand_key = "-".join((row['party'], row['candidate'])) 36 | # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist. 37 | race = results[race_key] 38 | race.setdefault(cand_key, []).append(row) 39 | 40 | return results 41 | -------------------------------------------------------------------------------- /elex2/tests/sample_results_parsed.json: -------------------------------------------------------------------------------- 1 | { 2 | "President": { 3 | "DEM-Doe, Jane": [ 4 | { 5 | "candidate": "Doe, Jane", 6 | "county": "Some County", 7 | "date": "2012-11-06", 8 | "district": "", 9 | "first_name": "Jane", 10 | "last_name": "Doe", 11 | "office": "President", 12 | "party": "DEM", 13 | "votes": 11 14 | }, 15 | { 16 | "candidate": "Doe, Jane", 17 | "county": "Another County", 18 | "date": "2012-11-06", 19 | "district": "", 20 | "first_name": "Jane", 21 | "last_name": "Doe", 22 | "office": "President", 23 | "party": "DEM", 24 | "votes": 5 25 | } 26 | ], 27 | "GOP-Smith, Joe": [ 28 | { 29 | "candidate": "Smith, Joe", 30 | "county": "Some County", 31 | "date": "2012-11-06", 32 | "district": "", 33 | "first_name": "Joe", 34 | "last_name": "Smith", 35 | "office": "President", 36 | "party": "GOP", 37 | "votes": 10 38 | }, 39 | { 40 | "candidate": "Smith, Joe", 41 | "county": "Another County", 42 | "date": "2012-11-06", 43 | "district": "", 44 | "first_name": "Joe", 45 | "last_name": "Smith", 46 | "office": "President", 47 | "party": "GOP", 48 | "votes": 5 49 | } 50 | ] 51 | } 52 | } -------------------------------------------------------------------------------- /elex3/tests/sample_results_parsed.json: -------------------------------------------------------------------------------- 1 | { 2 | "President": { 3 | "DEM-Doe, Jane": [ 4 | { 5 | "candidate": "Doe, Jane", 6 | "county": "Some County", 7 | "date": "2012-11-06", 8 | "district": "", 9 | "first_name": "Jane", 10 | "last_name": "Doe", 11 | "office": "President", 12 | "party": "DEM", 13 | "votes": 11 14 | }, 15 | { 16 | "candidate": "Doe, Jane", 17 | "county": "Another County", 18 | "date": "2012-11-06", 19 | "district": "", 20 | "first_name": "Jane", 21 | "last_name": "Doe", 22 | "office": "President", 23 | "party": "DEM", 24 | "votes": 5 25 | } 26 | ], 27 | "GOP-Smith, Joe": [ 28 | { 29 | "candidate": "Smith, Joe", 30 | "county": "Some County", 31 | "date": "2012-11-06", 32 | "district": "", 33 | "first_name": "Joe", 34 | "last_name": "Smith", 35 | "office": "President", 36 | "party": "GOP", 37 | "votes": 10 38 | }, 39 | { 40 | "candidate": "Smith, Joe", 41 | "county": "Another County", 42 | "date": "2012-11-06", 43 | "district": "", 44 | "first_name": "Joe", 45 | "last_name": "Smith", 46 | "office": "President", 47 | "party": "GOP", 48 | "votes": 5 49 | } 50 | ] 51 | } 52 | } -------------------------------------------------------------------------------- /elex2/tests/sample_results_parsed_tie_race.json: -------------------------------------------------------------------------------- 1 | { 2 | "President": { 3 | "DEM-Doe, Jane": [ 4 | { 5 | "candidate": "Doe, Jane", 6 | "county": "Some County", 7 | "date": "2012-11-06", 8 | "district": "", 9 | "first_name": "Jane", 10 | "last_name": "Doe", 11 | "office": "President", 12 | "party": "DEM", 13 | "votes": 10 14 | }, 15 | { 16 | "candidate": "Doe, Jane", 17 | "county": "Another County", 18 | "date": "2012-11-06", 19 | "district": "", 20 | "first_name": "Jane", 21 | "last_name": "Doe", 22 | "office": "President", 23 | "party": "DEM", 24 | "votes": 5 25 | } 26 | ], 27 | "GOP-Smith, Joe": [ 28 | { 29 | "candidate": "Smith, Joe", 30 | "county": "Some County", 31 | "date": "2012-11-06", 32 | "district": "", 33 | "first_name": "Joe", 34 | "last_name": "Smith", 35 | "office": "President", 36 | "party": "GOP", 37 | "votes": 10 38 | }, 39 | { 40 | "candidate": "Smith, Joe", 41 | "county": "Another County", 42 | "date": "2012-11-06", 43 | "district": "", 44 | "first_name": "Joe", 45 | "last_name": "Smith", 46 | "office": "President", 47 | "party": "GOP", 48 | "votes": 5 49 | } 50 | ] 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /elex3/tests/sample_results_parsed_tie_race.json: -------------------------------------------------------------------------------- 1 | { 2 | "President": { 3 | "DEM-Doe, Jane": [ 4 | { 5 | "candidate": "Doe, Jane", 6 | "county": "Some County", 7 | "date": "2012-11-06", 8 | "district": "", 9 | "first_name": "Jane", 10 | "last_name": "Doe", 11 | "office": "President", 12 | "party": "DEM", 13 | "votes": 10 14 | }, 15 | { 16 | "candidate": "Doe, Jane", 17 | "county": "Another County", 18 | "date": "2012-11-06", 19 | "district": "", 20 | "first_name": "Jane", 21 | "last_name": "Doe", 22 | "office": "President", 23 | "party": "DEM", 24 | "votes": 5 25 | } 26 | ], 27 | "GOP-Smith, Joe": [ 28 | { 29 | "candidate": "Smith, Joe", 30 | "county": "Some County", 31 | "date": "2012-11-06", 32 | "district": "", 33 | "first_name": "Joe", 34 | "last_name": "Smith", 35 | "office": "President", 36 | "party": "GOP", 37 | "votes": 10 38 | }, 39 | { 40 | "candidate": "Smith, Joe", 41 | "county": "Another County", 42 | "date": "2012-11-06", 43 | "district": "", 44 | "first_name": "Joe", 45 | "last_name": "Smith", 46 | "office": "President", 47 | "party": "GOP", 48 | "votes": 5 49 | } 50 | ] 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /elex4/lib/models.py: -------------------------------------------------------------------------------- 1 | from operator import attrgetter 2 | 3 | class Race(object): 4 | 5 | def __init__(self, date, office, district): 6 | self.date = date 7 | self.office = office 8 | self.district = district 9 | self.total_votes = 0 10 | self.candidates = {} 11 | 12 | def add_result(self, result): 13 | self.total_votes += result['votes'] 14 | candidate = self.__get_or_create_candidate(result) 15 | candidate.add_votes(result['county'], result['votes']) 16 | 17 | def assign_winner(self): 18 | # sort cands from highest to lowest vote count 19 | sorted_cands = sorted(self.candidates.values(), key=attrgetter('votes'), reverse=True) 20 | 21 | # Determine winner, if any 22 | first = sorted_cands[0] 23 | second = sorted_cands[1] 24 | 25 | if first.votes != second.votes: 26 | first.winner = 'X' 27 | 28 | 29 | # Private methods 30 | def __get_or_create_candidate(self, result): 31 | key = (result['party'], result['candidate']) 32 | try: 33 | candidate = self.candidates[key] 34 | except KeyError: 35 | candidate = Candidate(result['candidate'], result['party']) 36 | self.candidates[key] = candidate 37 | return candidate 38 | 39 | 40 | class Candidate(object): 41 | 42 | def __init__(self, raw_name, party): 43 | self.last_name, self.first_name = self.__parse_name(raw_name) 44 | self.party = party 45 | self.county_results = {} 46 | self.votes = 0 47 | self.winner = '' 48 | 49 | def add_votes(self, county, votes): 50 | self.county_results[county] = votes 51 | self.votes += votes 52 | 53 | # Private method 54 | def __parse_name(self, raw_name): 55 | return [name.strip() for name in raw_name.split(",")] 56 | -------------------------------------------------------------------------------- /elex3/scripts/save_summary_to_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This script leverages re-usable bits of code in the lib/ directory to 4 | generate a summary CSV of election results. 5 | 6 | USAGE: 7 | 8 | python save_summary_results_to_csv.py 9 | 10 | 11 | OUTPUT: 12 | 13 | summary_results.csv containing racewide totals for each race/candidate pair. 14 | 15 | 16 | """ 17 | from os.path import dirname, join 18 | import csv 19 | 20 | from elex3.lib.summary import summarize 21 | from elex3.lib.parser import parse_and_clean 22 | from elex3.lib.scraper import download_results 23 | 24 | 25 | def main(): 26 | fname = 'fake_va_elec_results.csv' 27 | path = join(dirname(dirname(__file__)), fname) 28 | download_results(path) 29 | results = parse_and_clean(path) 30 | summary = summarize(results) 31 | write_csv(summary) 32 | 33 | 34 | def write_csv(summary): 35 | """Generates CSV from summary election results data 36 | 37 | CSV is written to 'summary_results.csv' file in elex3/ directory. 38 | 39 | """ 40 | outfile = join(dirname(dirname(__file__)), 'summary_results.csv') 41 | with open(outfile, 'wb') as fh: 42 | # Limit output to cleanly parsed, standardized values 43 | fieldnames = [ 44 | 'date', 45 | 'office', 46 | 'district', 47 | 'last_name', 48 | 'first_name', 49 | 'party', 50 | 'all_votes', 51 | 'votes', 52 | 'winner', 53 | ] 54 | writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL) 55 | writer.writeheader() 56 | for race, results in summary.items(): 57 | cands = results.pop('candidates') 58 | for cand in cands: 59 | results.update(cand) 60 | writer.writerow(results) 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /elex4/scripts/save_summary_to_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This script leverages re-usable bits of code in the lib/ directory to 4 | generate a summary CSV of election results. 5 | 6 | USAGE: 7 | 8 | python save_summary_results_to_csv.py 9 | 10 | 11 | OUTPUT: 12 | 13 | summary_results.csv containing racewide totals for each race/candidate pair. 14 | 15 | 16 | """ 17 | from os.path import dirname, join 18 | import csv 19 | 20 | from elex4.lib.summary import summarize 21 | from elex4.lib.parser import parse_and_clean 22 | from elex4.lib.scraper import download_results 23 | 24 | 25 | def main(): 26 | fname = 'fake_va_elec_results.csv' 27 | path = join(dirname(dirname(__file__)), fname) 28 | download_results(path) 29 | results = parse_and_clean(path) 30 | summary = summarize(results) 31 | write_csv(summary) 32 | 33 | 34 | def write_csv(summary): 35 | """Generates CSV from summary election results data 36 | 37 | CSV is written to 'summary_results.csv' file in elex4/ directory. 38 | 39 | """ 40 | outfile = join(dirname(dirname(__file__)), 'summary_results.csv') 41 | with open(outfile, 'wb') as fh: 42 | # Limit output to cleanly parsed, standardized values 43 | fieldnames = [ 44 | 'date', 45 | 'office', 46 | 'district', 47 | 'last_name', 48 | 'first_name', 49 | 'party', 50 | 'all_votes', 51 | 'votes', 52 | 'winner', 53 | ] 54 | writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL) 55 | writer.writeheader() 56 | for race, results in summary.items(): 57 | cands = results.pop('candidates') 58 | for cand in cands: 59 | results.update(cand) 60 | writer.writerow(results) 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /elex3/lib/summary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from collections import defaultdict 3 | from operator import itemgetter 4 | 5 | 6 | def summarize(results): 7 | """Tally votes for Races and candidates and assign winners. 8 | 9 | RETURNS: 10 | 11 | Dictionary of results 12 | 13 | """ 14 | summary = defaultdict(dict) 15 | 16 | for race_key, cand_results in results.items(): 17 | all_votes = 0 18 | cands = [] 19 | for cand_key, results in cand_results.items(): 20 | # Populate a new candidate dict using one set of county results 21 | cand = { 22 | 'first_name': results[0]['first_name'], 23 | 'last_name': results[0]['last_name'], 24 | 'party': results[0]['party'], 25 | 'winner': '', 26 | } 27 | # Calculate candidate total votes 28 | cand_total_votes = sum([result['votes'] for result in results]) 29 | cand['votes'] = cand_total_votes 30 | # Add cand totals to racewide vote count 31 | all_votes += cand_total_votes 32 | # And stash the candidate's data 33 | cands.append(cand) 34 | 35 | # sort cands from highest to lowest vote count 36 | sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) 37 | 38 | # Determine winner, if any 39 | first = sorted_cands[0] 40 | second = sorted_cands[1] 41 | 42 | if first['votes'] != second['votes']: 43 | first['winner'] = 'X' 44 | 45 | # Get race metadata from one set of results 46 | result = cand_results.values()[0][0] 47 | summary[race_key] = { 48 | 'all_votes': all_votes, 49 | 'date': result['date'], 50 | 'office': result['office'], 51 | 'district': result['district'], 52 | 'candidates': sorted_cands, 53 | } 54 | 55 | return summary 56 | 57 | -------------------------------------------------------------------------------- /elex4/tests/test_summary.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | import json 4 | 5 | from elex4.lib.models import Race 6 | from elex4.lib.summary import summarize 7 | 8 | 9 | class TestSummaryBase(TestCase): 10 | 11 | def setUp(self): 12 | # Recall that sample data only has a single Presidential race 13 | race = Race('2012-11-06', 'President', '') 14 | for result in self.SAMPLE_RESULTS: 15 | race.add_result(result) 16 | # summarize function expects a dict, keyed by race 17 | summary = summarize({'President': race}) 18 | self.race = summary['President'] 19 | 20 | class TestSummaryResults(TestSummaryBase): 21 | 22 | json_file = open(join(dirname(__file__), 'sample_results_parsed.json'), 'rb') 23 | SAMPLE_RESULTS = json.load(json_file) 24 | 25 | def test_racewide_vote_total(self): 26 | "Summary results should be annotated with total votes cast in race" 27 | self.assertEqual(self.race['all_votes'], 31) 28 | 29 | def test_candiate_vote_totals(self): 30 | "Summary candidates should reflect total votes from all counties" 31 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 32 | self.assertEqual(smith['votes'], 15) 33 | 34 | def test_winner_has_flag(self): 35 | "Winner flag should be assigned to candidates with most votes" 36 | doe = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Doe'][0] 37 | self.assertEqual(doe['winner'], 'X') 38 | 39 | def test_loser_has_no_winner_flag(self): 40 | "Winner flag should not be assigned to candidate that does not have highest vote total" 41 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 42 | self.assertEqual(smith['winner'], '') 43 | 44 | 45 | class TestTieRace(TestSummaryBase): 46 | 47 | json_file = open(join(dirname(__file__), 'sample_results_parsed_tie_race.json'), 'rb') 48 | SAMPLE_RESULTS = json.load(json_file) 49 | 50 | def test_tie_race_winner_flags(self): 51 | "Winner flag should not be assigned to any candidate in a tie race" 52 | for cand in self.race['candidates']: 53 | self.assertEqual(cand['winner'], '') 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # refactoring101 2 | 3 | ![Draw the rest of the owl](http://i2.kym-cdn.com/photos/images/original/000/572/090/77f.jpg) 4 | 5 | ## Inspiration 6 | 7 | > "[Complexity kills][]." ~ *Ray Ozzie* 8 | 9 | > "The art of simplicity is a puzzle of complexity." ~ *Douglas Horton* 10 | 11 | > "...you're not refactoring; you're just [changing shit][]." ~ *Hamlet D'Arcy* 12 | 13 | [Complexity kills]: http://ozzie.net/docs/dawn-of-a-new-day/ 14 | [changing shit]: http://hamletdarcy.blogspot.com/2009/06/forgotten-refactorings.html 15 | 16 | ## Overview 17 | 18 | This repo contains code samples demonstrating how to transform a complex, linear script into a modular, 19 | easier-to-maintain package. The code is a teaching aide used in Python classes at [NICAR 2014][] and [NICAR 2015][], 20 | but can also work as a stand-alone tutorial (check out our [main documentation site](http://refactoring-101.readthedocs.org/en/latest/)). 21 | 22 | The tutorial uses a small, [fake set of election results][] for demonstration purposes. 23 | 24 | Project code evolves through four phases, each contained in a numbered *elex* directory. Below are descriptions of each phase, 25 | along with related questions and exercises that anticipate the next phase or set of skills. 26 | 27 | The goal is to demonstrate how to use Python functions, modules, packages and classes to organize code more effectively. 28 | We also introduce unit testing as a strategy for writing programs that you can update with confidence. The overarching theme: 29 | **_As an application or program grows in size, writing readable code with tests can help tame complexity and keep you sane._** 30 | 31 | Wondering how to use this tutorial or why the hell we called it *refactoring101*? The [FAQ][] has answers to 32 | these and sundry other questions. Also, check out the [Resources][] page for wisdom from our tribal elders. 33 | 34 | [NICAR 2014]: https://ire.org/events-and-training/event/973/1107/ 35 | [NICAR 2015]: http://www.ire.org/events-and-training/event/1494/1600/ 36 | [fake set of election results]: https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=html 37 | [FAQ]: https://github.com/PythonJournos/refactoring101/wiki/FAQ 38 | [Resources]: https://github.com/PythonJournos/refactoring101/wiki/Resources 39 | -------------------------------------------------------------------------------- /_docs/phase2.rst: -------------------------------------------------------------------------------- 1 | Function breakdown and Testing Intro 2 | ==================================== 3 | 4 | In the `elex2/ `__ 5 | directory, we've chopped up the original election\_results.py code into a bunch of functions and turned this code 6 | directory into a package by adding an \_\_init\_\_.py file. 7 | 8 | We've also added a suite of tests. This way we can methodically change 9 | the underlying code in later phases, while having greater confidence 10 | that we haven't corrupted our summary numbers. 11 | 12 | **Note**: We can't stress this step enough: Testing existing code is *THE* critical first step in refactoring. 13 | 14 | If the code doesn't have tests, write some, at least for the most important bits of logic. 15 | Otherwise you're just `changing shit `__. 16 | 17 | Fortunately, our code has a suite of `unit 18 | tests `__ for name 19 | parsing and, most importantly, the summary logic. 20 | 21 | Python has built-in facilities for running tests, but they're a little 22 | raw for our taste. We'll use the 23 | `nose `__ library to 24 | more easily run our tests: 25 | 26 | .. code:: bash 27 | 28 | nosetests -v tests/test_parser.py 29 | # or run all tests in the tests/ directory 30 | nosetests -v tests/*.py 31 | 32 | Observations 33 | ------------ 34 | 35 | At a high level, this code is an improvement over *elex1/*, but it could 36 | still be much improved. We'll get to that in Phase 3, when we introduce 37 | `modules `__ and 38 | `packages `__. 39 | 40 | Questions 41 | --------- 42 | 43 | - What is \_\_init\_\_.py and why do we use it? 44 | - In what order are test methods run? 45 | - What does the TestCase *setUp* method do? 46 | - What other TestCase methods are available? 47 | 48 | Exercises 49 | --------- 50 | 51 | - Install `nose `__ 52 | and run the tests. Try breaking a few tests and run them to see the 53 | results. 54 | - List three ways this code is better than the previous version; and 55 | three ways it could be improved. 56 | - Organize functions in *election\_results.py* into two or more new 57 | modules. (Hint: There is no right answer here. `Naming things is 58 | hard `__; aim for 59 | directory and file names that are short but meaningful to a normal 60 | human). 61 | -------------------------------------------------------------------------------- /_docs/phase3.rst: -------------------------------------------------------------------------------- 1 | Modules, packages, oh my!! 2 | ========================== 3 | 4 | In this third phase, we chop up our original *election\_results.py* 5 | module into a legitimate Python package. The new directory structure is 6 | (hopefully) self-explanatory: 7 | 8 | :: 9 | 10 | ├── elex3 11 | │   ├── __init__.py 12 | │   ├── lib 13 | │   │   ├── __init__.py 14 | │   │   ├── parser.py 15 | │   │   ├── scraper.py 16 | │   │   └── summary.py 17 | │   ├── scripts 18 | │   │   └── save_summary_to_csv.py 19 | │   └── tests 20 | │   ├── __init__.py 21 | │   ├── sample_results.csv 22 | │   ├── sample_results_parsed.json 23 | │   ├── sample_results_parsed_tie_race.json 24 | │   ├── test_parser.py 25 | │   └── test_summary.py 26 | 27 | - ``lib/`` contains re-usable bits of code. 28 | - ``scripts/`` contains...well..scripts that leverage our re-usable 29 | code. 30 | - ``tests/`` contains tests for re-usable bits of code and related 31 | fixtures. 32 | 33 | Note that we did not change any of our functions. Mostly we just 34 | re-organized them into new modules, with the goal of grouping related 35 | bits of logic in common-sense locations. We also updated imports and 36 | "namespaced" them of our own re-usable code under *elex3.lib*. 37 | 38 | Here's where we start seeing the benefits of the tests we wrote in the 39 | *elex2* phase. While we've heavily re-organized our underlying code 40 | structure, we can run the same tests (with a few minor updates to 41 | *import* statements) to ensure that we haven't broken anything. 42 | 43 | **Note**: You must add the *refactoring101* directory to your 44 | *PYTHONPATH* before any of the tests or script will work. 45 | 46 | .. code:: bash 47 | 48 | $ cd /path/to/refactoring101 49 | $ export PYTHONPATH=`pwd`:$PYTHONPATH 50 | 51 | $ nosetests -v elex3/tests/*.py 52 | $ python elex3/scripts/save_summary_to_csv.py 53 | 54 | Check out the results of the *save\_summary\_to\_csv.py* command. The 55 | new *summary\_results.csv* should be stored *inside* the *elex3* 56 | directory, and should match the results file produced by 57 | *elex2/election\_results.py*. 58 | 59 | Questions 60 | --------- 61 | 62 | - Do you *like* the package structure and module names? How would you 63 | organize or name things differently? 64 | - Why is it necessary to add the *refactoring101/* directory to your 65 | PYTHONPATH? 66 | - What are three ways to add a library to the PYTHONPATH? 67 | - What is a class? What is a method? 68 | - What is an object in Python? What is an instance? 69 | - What is the **init** method on a class used for? 70 | - What is *self* and how does it relate to class instances? 71 | 72 | Exercises 73 | --------- 74 | 75 | - Look at the original results data, and model out some classes and 76 | methods to reflect "real world" entities in the realm of elections. 77 | - Examine functions in *lib/* and try assigning three functions to one 78 | of your new classes. 79 | - Try extracting logic from the *summarize* function and re-implement 80 | it as a method on one of your classes. 81 | -------------------------------------------------------------------------------- /elex2/tests/test_summary.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | import json 4 | 5 | from elex2.election_results import summarize 6 | 7 | 8 | class TestSummaryResults(TestCase): 9 | 10 | # Read the results of the parse_and_clean function stored in a test fixture 11 | json_file = open(join(dirname(__file__), 'sample_results_parsed.json'), 'rb') 12 | SAMPLE_RESULTS = json.load(json_file) 13 | # Q: Why aren't we just using the parse_and_clean method instead of 14 | # using a snapshot of that function's output? 15 | # A: To achieve better test isolation! 16 | 17 | # Q: Why aren't we reading in the JSON in a setUp method? 18 | # A: setUp is called before each test method. This ensures we only 19 | # incur the overhead of reading in the JSON once. In python2.7 or newer, 20 | # you should use the setUpClass method instead of a class attribute. 21 | # http://docs.python.org/2/library/unittest.html#unittest.TestCase.setUpClass 22 | 23 | # We will, however, use the setUp method to call the summarize 24 | # function afresh before each of our test methods. 25 | def setUp(self): 26 | results = summarize(self.SAMPLE_RESULTS) 27 | self.race = results['President'] 28 | 29 | def test_racewide_vote_total(self): 30 | "Summary results should be annotated with total votes cast in race" 31 | self.assertEqual(self.race['all_votes'], 31) 32 | 33 | def test_candiate_vote_totals(self): 34 | "Summary candidates should reflect total votes from all counties" 35 | # Loop through candidates and find Smith rather than relying on 36 | # default sorting of candidates, which would make this test brittle 37 | # the implementation changed. 38 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 39 | self.assertEqual(smith['votes'], 15) 40 | 41 | def test_winner_has_flag(self): 42 | "Winner flag should be assigned to candidates with most votes" 43 | doe = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Doe'][0] 44 | self.assertEqual(doe['winner'], 'X') 45 | 46 | def test_loser_has_no_winner_flag(self): 47 | "Winner flag should be not be assigned to candidate with that does not have highest vote total" 48 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 49 | self.assertEqual(smith['winner'], '') 50 | 51 | 52 | class TestTieRace(TestCase): 53 | 54 | # Q: Why do we need a new class and fixture for this race? 55 | # A: So that we can change the vote counts so that we have a tie, of course! 56 | # We don't *need* a new test class, but hey, why not? 57 | json_file = open(join(dirname(__file__), 'sample_results_parsed_tie_race.json'), 'rb') 58 | SAMPLE_RESULTS = json.load(json_file) 59 | 60 | def test_tie_race_winner_flags(self): 61 | "Winner flag should not be assigned to any candidate in a tie race" 62 | results = summarize(self.SAMPLE_RESULTS) 63 | race = results['President'] 64 | for cand in race['candidates']: 65 | self.assertEqual(cand['winner'], '') 66 | -------------------------------------------------------------------------------- /elex3/tests/test_summary.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from unittest import TestCase 3 | import json 4 | 5 | from elex3.lib.summary import summarize 6 | 7 | 8 | class TestSummaryResults(TestCase): 9 | 10 | # Read the results of the parse_and_clean function stored in a test fixture 11 | json_file = open(join(dirname(__file__), 'sample_results_parsed.json'), 'rb') 12 | SAMPLE_RESULTS = json.load(json_file) 13 | # Q: Why aren't we just using the parse_and_clean method instead of 14 | # using a snapshot of that function's output? 15 | # A: To achieve better test isolation! 16 | 17 | # Q: Why aren't reading in the JSON in a setUp method? 18 | # A: setUp is called before each test method. This ensures we only 19 | # incur the overhead of reading in the JSON once. In python2.7 or newer, 20 | # you should use the setUpClass method instead of a class attribute. 21 | # http://docs.python.org/2/library/unittest.html#unittest.TestCase.setUpClass 22 | 23 | # We will, however, use the setUp method to call the summarize 24 | # funciton afresh before each of our test methods. 25 | def setUp(self): 26 | results = summarize(self.SAMPLE_RESULTS) 27 | self.race = results['President'] 28 | 29 | def test_racewide_vote_total(self): 30 | "Summary results should be annotated with total votes cast in race" 31 | self.assertEqual(self.race['all_votes'], 31) 32 | 33 | def test_candiate_vote_totals(self): 34 | "Summary candidates should reflect total votes from all counties" 35 | # Loop through candidates and find Smith rather than relying on 36 | # default sorting of candidates, which would make this test brittle 37 | # the implementation changed. 38 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 39 | self.assertEqual(smith['votes'], 15) 40 | 41 | def test_winner_has_flag(self): 42 | "Winner flag should be assigned to candidates with most votes" 43 | doe = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Doe'][0] 44 | self.assertEqual(doe['winner'], 'X') 45 | 46 | def test_loser_has_no_winner_flag(self): 47 | "Winner flag should not be assigned to candidate that does not have highest vote total" 48 | smith = [cand for cand in self.race['candidates'] if cand['last_name'] == 'Smith'][0] 49 | self.assertEqual(smith['winner'], '') 50 | 51 | 52 | class TestTieRace(TestCase): 53 | 54 | # Q: Why do we need a new class and fixture for this race? 55 | # A: So that we can change the vote counts so that we have a tie, of course! 56 | # We don't *need* a new test class, but hey, why not? 57 | json_file = open(join(dirname(__file__), 'sample_results_parsed_tie_race.json'), 'rb') 58 | SAMPLE_RESULTS = json.load(json_file) 59 | 60 | def test_tie_race_winner_flags(self): 61 | "Winner flag should not be assigned to any candidate in a tie race" 62 | pass 63 | results = summarize(self.SAMPLE_RESULTS) 64 | race = results['President'] 65 | for cand in race['candidates']: 66 | self.assertEqual(cand['winner'], '') 67 | -------------------------------------------------------------------------------- /_docs/index.rst: -------------------------------------------------------------------------------- 1 | refactoring101 2 | ============== 3 | 4 | Inspiration 5 | ----------- 6 | 7 | "`Complexity kills `__." ~ 8 | *Ray Ozzie* 9 | 10 | "The art of simplicity is a puzzle of complexity." ~ *Douglas 11 | Horton* 12 | 13 | "...you're not refactoring; you're just `changing 14 | shit `__." 15 | ~ *Hamlet D'Arcy* 16 | 17 | 18 | Overview 19 | -------- 20 | 21 | So you've written a few scripts that get the job done. The machine 22 | does your bidding, but the initial euphoria has worn off. 23 | 24 | Bugs are cropping up. Data quirks are creeping in. Duplicate code is spreading like a 25 | virus across projects, or worse, inside the same project. Programs aren't `failing gracefully `__. 26 | 27 | There *must* be a better way, but the path forward is not clear. 28 | 29 | If you're like us and have had that itchy feeling, this tutorial is for you. 30 | 31 | After you've mastered the basics of writing code, you need to understand how to *design* programs. 32 | The goal of this tutorial is to bridge that gap. We'll demonstrate how to use Python language 33 | features -- functions, modules, packages and classes -- to organize code more effectively. We also 34 | introduce unit testing as a strategy for writing programs that you can update with confidence. 35 | 36 | The overarching theme: **As a program grows in size, writing readable code with tests can help tame 37 | complexity and keep you sane.** 38 | 39 | 40 | How To Use This Tutorial 41 | ------------------------ 42 | 43 | The `Github repo `__ contains code samples demonstrating how to transform a 44 | complex, linear script into a modular, easier-to-maintain package. The code was written as a reference for Python classes at 45 | NICAR `2014 `__ and `2015 `__, 46 | but can also be used as a stand-alone tutorial. 47 | 48 | We use a small, `fake set of election results `__ 49 | for demonstration purposes. Project code evolves through four phases, each contained in a numbered 50 | *elex* directory in the `code repo `__. 51 | 52 | **Each section ends with questions and/or exercises. These are the most important part of the tutorial.** 53 | You're supposed to wrestle with these questions and exercises. Tinker with the code; break the code; write alternative versions of the code. 54 | Then email me (it's not Jeremy's fault) and explain why the code sucks. 55 | Then read your own code from six months ago ;) 56 | 57 | 58 | Questions and Resources 59 | ----------------------- 60 | 61 | Still have questions? Check out the :ref:`FAQ`, as well the :ref:`Resources` page for wisdom from tribal elders. 62 | 63 | 64 | Table of Contents 65 | ----------------- 66 | .. toctree:: 67 | :maxdepth: 1 68 | 69 | phase1 70 | phase2 71 | phase3 72 | phase4/overview 73 | phase4/candidates 74 | phase4/races 75 | phase4/swapout 76 | whats_next 77 | faq 78 | resources 79 | -------------------------------------------------------------------------------- /_docs/faq.rst: -------------------------------------------------------------------------------- 1 | .. _FAQ: 2 | 3 | FAQ 4 | ==== 5 | 6 | Why refactoring? 7 | ---------------- 8 | 9 | First, what the hell is refactoring? The technical definition is 10 | explained nicely by 11 | `Wikipedia `__. But in a 12 | nutshell, it's a deliberate process of changing code so that it's easier 13 | to understand and change in the future. Refactoring has a long and rich 14 | history and can get quite technical, but we use the term loosely here. 15 | We called this git repo/tutorial *refactoring101* because it attempts to 16 | show how you can apply some basic principles and techniques to manage 17 | larger programs. These skills aren't only useful for changing existing 18 | programs. They're also a handy way of designing larger applications from 19 | the outset. 20 | 21 | Who is this tutorial for? 22 | ------------------------- 23 | 24 | We assume you're already comfortable with basic programming concepts in 25 | Python. You understand loops, conditionals, variables, and basic data 26 | types (strings, integers, lists, dictionaries, even sets!). You've also 27 | written a few functions in your day. But you're in that nether realm 28 | where you get the basics, but aren't quite sure how to write larger 29 | programs. Or perhaps like many before you (we've all been there), you've 30 | written a monstrosity of a script that is error-prone, brittle, and hard 31 | to decipher. You suspect there must be a better way to craft large 32 | programs, but you're not quite sure how. If you have that itchy feeling, 33 | this tutorial is for you. 34 | 35 | How should I use this tutorial? 36 | ------------------------------- 37 | 38 | It's an immersion exercise. This is not a tutorial where we walk through 39 | the code, explaining every step. We provide an overview of the code at 40 | each stage, and some teasers in the Questions and Exercises sections to 41 | nudge you toward new concepts and techniques. But ultimately, it's up to 42 | you to read the code, research language features that are new or fuzzy, 43 | and experiment by modifying and running the scripts (and tests). That 44 | said, you're not alone in the deep end. Hit us up on 45 | `PythonJournos `__ as you 46 | work through the tutorial. We're friendly. We promise :) 47 | 48 | Why did we write this? 49 | ---------------------- 50 | 51 | Because like you, we've experienced the thrill of mastering the basics 52 | -- writing those first few scripts to get things done -- and the 53 | inevitable frustration of not knowing what comes next. How do I write a 54 | bigger program that does more than one thing? How do I know if some part 55 | of the program failed? How can I use this bit of code in another script, 56 | without having to update the code in both places? We've wrung our fists 57 | in the air over the same questions. Hopefully this tutorial helps nudge 58 | you toward some answers. 59 | 60 | Who wrote this tutorial? 61 | ------------------------ 62 | 63 | `Jeremy Bowers `__ and `Serdar 64 | Tumgoren `__, nerds from the journalism 65 | community. Don't be shy. Hit us up with questions, pull requests, angry 66 | criticisms, etc. 67 | 68 | Am I alone? 69 | ----------- 70 | 71 | That's pretty deep. But we're inclined to say 72 | `no `__. 73 | -------------------------------------------------------------------------------- /elex4/tests/test_models.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from elex4.lib.models import Candidate, Race 4 | 5 | 6 | class TestCandidate(TestCase): 7 | 8 | def test_candidate_name(self): 9 | "Candidates should have first_name and last_name attributes" 10 | cand = Candidate("Smith, Joe", "GOP") 11 | self.assertEquals(cand.first_name, "Joe") 12 | self.assertEquals(cand.last_name, "Smith") 13 | 14 | 15 | class TestCandidateVotes(TestCase): 16 | 17 | def setUp(self): 18 | self.cand = Candidate("Smith, Joe", "GOP") 19 | 20 | def test_default_zero_votes(self): 21 | "Candidate vote count should default to zero" 22 | self.assertEquals(self.cand.votes, 0) 23 | 24 | def test_vote_count_update(self): 25 | "Candidate.add_votes method should update vote count" 26 | self.cand.add_votes("Some County", 20) 27 | self.assertEquals(self.cand.votes, 20) 28 | 29 | def test_county_results_access(self): 30 | "Candidate.add_votes method should store county results" 31 | self.cand.add_votes("Some County", 20) 32 | expected = { "Some County": 20 } 33 | self.assertEquals(self.cand.county_results, expected) 34 | 35 | 36 | class TestRace(TestCase): 37 | 38 | def setUp(self): 39 | self.smith_result = { 40 | 'date': '2012-11-06', 41 | 'candidate': 'Smith, Joe', 42 | 'party': 'Dem', 43 | 'office': 'President', 44 | 'county': 'Fairfax', 45 | 'votes': 2000, 46 | } 47 | self.doe_result = { 48 | 'date': '2012-11-06', 49 | 'candidate': 'Doe, Jane', 50 | 'party': 'GOP', 51 | 'office': 'President', 52 | 'county': 'Fairfax', 53 | 'votes': 1000, 54 | } 55 | self.race = Race("2012-11-06", "President", "") 56 | 57 | def test_total_votes_default(self): 58 | "Race total votes should default to zero" 59 | self.assertEquals(self.race.total_votes, 0) 60 | 61 | def test_total_votes_update(self): 62 | "Race.add_result should update racewide vote count" 63 | self.race.add_result(self.smith_result) 64 | self.assertEquals(self.race.total_votes, 2000) 65 | 66 | def test_add_result_to_candidate(self): 67 | "Race.add_result should update a unique candidate instance" 68 | # Add a vote twice. If it's the same candidate, vote total should be sum of results 69 | self.race.add_result(self.smith_result) 70 | self.race.add_result(self.smith_result) 71 | cand_key = (self.smith_result['party'], self.smith_result['candidate']) 72 | candidate = self.race.candidates[cand_key] 73 | self.assertEquals(candidate.votes, 4000) 74 | 75 | def test_winner_has_flag(self): 76 | "Winner flag should be assigned to candidates with most votes" 77 | self.race.add_result(self.doe_result) 78 | self.race.add_result(self.smith_result) 79 | self.race.assign_winner() 80 | smith = [cand for cand in self.race.candidates.values() if cand.last_name == 'Smith'][0] 81 | self.assertEqual(smith.winner, 'X') 82 | 83 | def test_loser_has_no_winner_flag(self): 84 | "Winner flag should not be assigned to candidate who does not have highest vote total" 85 | self.race.add_result(self.doe_result) 86 | self.race.add_result(self.smith_result) 87 | self.race.assign_winner() 88 | doe = [cand for cand in self.race.candidates.values() if cand.last_name == 'Doe'][0] 89 | self.assertEqual(doe.winner, '') 90 | 91 | def test_tie_race(self): 92 | "Winner flag should not be assigned to any candidate in a tie race" 93 | # Modify Doe vote count to make it a tie 94 | self.doe_result['votes'] = 2000 95 | self.race.add_result(self.doe_result) 96 | self.race.add_result(self.smith_result) 97 | self.race.assign_winner() 98 | for cand in self.race.candidates.values(): 99 | self.assertEqual(cand.winner, '') 100 | -------------------------------------------------------------------------------- /elex1/election_results.py: -------------------------------------------------------------------------------- 1 | """ 2 | A monstrosity of an election results script. Calculates total votes for 3 | races and candidates, and determines if there is a winner in each race. 4 | 5 | This module bundles together way too much functionality and is near impossible 6 | to test, beyond eye-balling results. 7 | 8 | USAGE: 9 | 10 | python election_results.py 11 | 12 | OUTPUT: 13 | 14 | summary_results.csv 15 | 16 | """ 17 | import csv 18 | import urllib 19 | from operator import itemgetter 20 | from collections import defaultdict 21 | from os.path import dirname, join 22 | 23 | 24 | # Download CSV of fake Virginia election results to root of project 25 | url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv" 26 | filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv') 27 | urllib.urlretrieve(url, filename) 28 | 29 | # Create reader for ingesting CSV as array of dicts 30 | reader = csv.DictReader(open(filename, 'rb')) 31 | 32 | # Use defaultdict to automatically create non-existent keys with an empty dictionary as the default value. 33 | # See https://pydocs2cn.readthedocs.org/en/latest/library/collections.html#defaultdict-objects 34 | results = defaultdict(dict) 35 | 36 | # Initial data clean-up 37 | for row in reader: 38 | # Parse name into first and last 39 | row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')] 40 | # Convert total votes to an integer 41 | row['votes'] = int(row['votes']) 42 | 43 | # Store county-level results by slugified office and district (if there is one), 44 | # then by candidate party and raw name 45 | race_key = row['office'] 46 | if row['district']: 47 | race_key += "-%s" % row['district'] 48 | # Create unique candidate key from party and name, in case multiple candidates have same 49 | cand_key = "-".join((row['party'], row['candidate'])) 50 | # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist. 51 | race = results[race_key] 52 | race.setdefault(cand_key, []).append(row) 53 | 54 | 55 | # Tally votes for Races and candidates and assign winners 56 | summary = defaultdict(dict) 57 | 58 | for race_key, cand_results in results.items(): 59 | all_votes = 0 60 | cands = [] 61 | for cand_key, results in cand_results.items(): 62 | # Populate a new candidate dict using one set of county results 63 | cand = { 64 | 'first_name': results[0]['first_name'], 65 | 'last_name': results[0]['last_name'], 66 | 'party': results[0]['party'], 67 | 'winner': '', 68 | } 69 | # Calculate candidate total votes 70 | cand_total_votes = sum([result['votes'] for result in results]) 71 | cand['votes'] = cand_total_votes 72 | # Add cand totals to racewide vote count 73 | all_votes += cand_total_votes 74 | # And stash the candidate's data 75 | cands.append(cand) 76 | 77 | # sort cands from highest to lowest vote count 78 | sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) 79 | 80 | # Determine winner, if any 81 | first = sorted_cands[0] 82 | second = sorted_cands[1] 83 | 84 | if first['votes'] != second['votes']: 85 | first['winner'] = 'X' 86 | 87 | # Get race metadata from one set of results 88 | result = cand_results.values()[0][0] 89 | # Add results to output 90 | summary[race_key] = { 91 | 'date': result['date'], 92 | 'office': result['office'], 93 | 'district': result['district'], 94 | 'all_votes': all_votes, 95 | 'candidates': sorted_cands, 96 | } 97 | 98 | 99 | # Write CSV of results 100 | outfile = join(dirname(__file__), 'summary_results.csv') 101 | with open(outfile, 'wb') as fh: 102 | # We'll limit the output to cleanly parsed, standardized values 103 | fieldnames = [ 104 | 'date', 105 | 'office', 106 | 'district', 107 | 'last_name', 108 | 'first_name', 109 | 'party', 110 | 'all_votes', 111 | 'votes', 112 | 'winner', 113 | ] 114 | writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL) 115 | writer.writeheader() 116 | for race, results in summary.items(): 117 | cands = results.pop('candidates') 118 | for cand in cands: 119 | results.update(cand) 120 | writer.writerow(results) 121 | 122 | -------------------------------------------------------------------------------- /_docs/phase4/candidates.rst: -------------------------------------------------------------------------------- 1 | Candidates, of course 2 | ===================== 3 | 4 | An election has a set of races, each of which have candidates. 5 | So a `Candidate` class is a natural starting point for data modeling. 6 | 7 | What basic characteristics does a candidate have in the context of the 8 | `source data `__? 9 | 10 | Name, party and county election results jump out. 11 | 12 | A candidate also seems like a natural place for data transforms and 13 | computations that now live in *lib/parser.py* and *lib/summary.py*: 14 | 15 | - candidate name parsing 16 | - total candiate votes from all counties 17 | - winner status 18 | 19 | Before we migrate the hard stuff, let's start with the basics. 20 | 21 | We'll store new election classes in a *lib/models.py* 22 | (`Django `__ 23 | users, this should be familiar). We'll store tests for our new classes 24 | in *test\_models.py* module. 25 | 26 | Now let's start writing some test-driven code! 27 | 28 | Name Parsing 29 | ------------ 30 | 31 | The Candidate class should be responsible for parsing a 32 | full name into first and last names (remember, candidate names in our 33 | source data are in the form *(Lastname, Firstname*). 34 | 35 | - Create *elex4/tests/test\_models.py* and add test for Candidate name 36 | parts 37 | - Run test; see it fail 38 | - Write a Candidate class with a private method to parse the full name 39 | 40 | *Note*: You can cheat here. Recall that the name parsing code was 41 | already written in *lib/parser.py*. 42 | 43 | - Run test; see it pass 44 | 45 | Observations 46 | ~~~~~~~~~~~~ 47 | 48 | In the refactoring above, notice that we're not directly testing the 49 | *name\_parse* method but simply checking for the correct value of the 50 | first and last names on candidate instances. The *name\_parse* code has 51 | been nicely tucked out of sight. In fact, we emphasize that this method 52 | is an *implementation detail* -- part of the *Candidate* class's 53 | internal housekeeping -- by prefixing it with two underscores. 54 | 55 | This syntax denotes a `private 56 | method `__ 57 | that is not intended for use by code outside the *Candidate* class. 58 | We're restricting (though not completely preventing) the outside world 59 | from using it, since it's quite possible this code wil change or be 60 | removed in the future. 61 | 62 | More frequently, you'll see a single underscore prefix used to 63 | denote private methods and variables. This is fine, though note that 64 | only the double underscores trigger the name-mangling intended to 65 | limit usage of the method. 66 | 67 | Questions 68 | ~~~~~~~~~ 69 | 70 | - In order to migrate functions to methods on the Candidate class, we 71 | had to make the first parameter in each method *self*. Why? 72 | 73 | 74 | County results 75 | -------------- 76 | 77 | In addition to a name and party, each *Candidate* has county-level 78 | results. As part of our summary report, county-level results need to be 79 | rolled up into a racewide total for each candidate. At a high level, it 80 | seems natural for each candidate to track his or her own vote totals. 81 | 82 | Below are a few basic assumptions, or requirements, that will help us 83 | flesh out vote-handling on the *Candidate* class: 84 | 85 | - A candidate should start with zero votes 86 | - Adding a vote should increment the vote count 87 | - County-level results should be accessible 88 | 89 | With this basic list of requirements in hand, we're ready to start 90 | coding. For each requirement, we'll start by writing a (failing) test 91 | that captures this assumption; then we'll write code to make the test 92 | pass (i.e. meet our assumption). 93 | 94 | 1. Add test to ensure *Candidate*'s initial vote count is zero 95 | 96 | Note: We created a new *TestCandidateVotes* class with a *setUp* 97 | method that lets us re-use the same candidate instance across all 98 | test methods. This makes our tests less brittle -- e.g., if we add a 99 | parameter to the *Candidate* class, we only have to update the 100 | candidate instance in the *setUp* method, rather than in every test 101 | method (as we will have to do in the *TestCandidate* class) 102 | 103 | 1. Run test; see it fail 104 | 2. Update *Candidate* class to have initial vote count of zero 105 | 3. Run test; see it pass 106 | 107 | Now let's add a method to update the candidate's total vote totals for 108 | each county result. 109 | 110 | 1. Add test for *Candidate.add\_votes* method 111 | 2. Run test; see it fail 112 | 3. Create the *Candidate.add\_votes* method 113 | 4. Run test; see it pass 114 | 115 | Finally, let's stash the county-level results for each candidate. 116 | Although we're not using these lower-level numbers in our summary 117 | report, it's easy enough to add in case we need them for down the road. 118 | 119 | 1. Create test for county\_results attribute 120 | 2. Run test; see it fail 121 | 3. Update *Candidate.add\_votes* method to store county-level results 122 | 4. Run test; see it pass 123 | 124 | Questions 125 | ~~~~~~~~~ 126 | 127 | Exercises 128 | ~~~~~~~~~ 129 | 130 | - The *Candidate.add\_votes* method has a potential bug: It can't 131 | handle votes that are strings instead of proper integers. This bug 132 | might crop up if our parser fails to convert strings to integers. 133 | Write a unit test to capture the bug, then update the method to 134 | handle such "dirty data" gracefully. 135 | -------------------------------------------------------------------------------- /elex2/election_results.py: -------------------------------------------------------------------------------- 1 | """ 2 | In this second pass at the election_results.py script, we chop up the code into 3 | functions and add a few tests. 4 | 5 | USAGE: 6 | 7 | python election_results.py 8 | 9 | OUTPUT: 10 | 11 | summary_results.csv 12 | 13 | """ 14 | import csv 15 | import urllib 16 | from operator import itemgetter 17 | from collections import defaultdict 18 | from os.path import dirname, join 19 | 20 | 21 | def main(): 22 | # Download CSV of fake Virginia election results to root of project 23 | path = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv') 24 | download_results(path) 25 | # Process data 26 | results = parse_and_clean(path) 27 | summary = summarize(results) 28 | write_csv(summary) 29 | 30 | 31 | #### PRIMARY FUNCS #### 32 | ### These funcs perform the major steps of our application ### 33 | 34 | def download_results(path): 35 | """Download CSV of fake Virginia election results from GDocs""" 36 | url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv" 37 | urllib.urlretrieve(url, path) 38 | 39 | def parse_and_clean(path): 40 | """Parse downloaded results file and perform various data clean-ups 41 | 42 | 43 | RETURNS: 44 | 45 | Nested dictionary keyed by race, then candidate. 46 | Candidate value is an array of dicts containing county level results. 47 | 48 | """ 49 | # Create reader for ingesting CSV as array of dicts 50 | reader = csv.DictReader(open(path, 'rb')) 51 | 52 | # Use defaultdict to automatically create non-existent keys with an empty dictionary as the default value. 53 | # See https://pydocs2cn.readthedocs.org/en/latest/library/collections.html#defaultdict-objects 54 | results = defaultdict(dict) 55 | 56 | # Initial data clean-up 57 | for row in reader: 58 | # Parse name into first and last 59 | row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')] 60 | # Convert total votes to an integer 61 | row['votes'] = int(row['votes']) 62 | 63 | # Store county-level results by slugified office and district (if there is one), 64 | # then by candidate party and raw name 65 | race_key = row['office'] 66 | if row['district']: 67 | race_key += "-%s" % row['district'] 68 | # Create unique candidate key from party and name, in case multiple candidates have same 69 | cand_key = "-".join((row['party'], row['candidate'])) 70 | # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist. 71 | race = results[race_key] 72 | race.setdefault(cand_key, []).append(row) 73 | 74 | return results 75 | 76 | 77 | def summarize(results): 78 | """Tally votes for Races and candidates and assign winner flag. 79 | 80 | RETURNS: 81 | 82 | Dictionary of results 83 | 84 | """ 85 | summary = defaultdict(dict) 86 | 87 | for race_key, cand_results in results.items(): 88 | all_votes = 0 89 | cands = [] 90 | for cand_key, results in cand_results.items(): 91 | # Populate a new candidate dict using one set of county results 92 | cand = { 93 | 'first_name': results[0]['first_name'], 94 | 'last_name': results[0]['last_name'], 95 | 'party': results[0]['party'], 96 | 'winner': '', 97 | } 98 | # Calculate candidate total votes 99 | cand_total_votes = sum([result['votes'] for result in results]) 100 | cand['votes'] = cand_total_votes 101 | # Add cand totals to racewide vote count 102 | all_votes += cand_total_votes 103 | # And stash the candidate's data 104 | cands.append(cand) 105 | 106 | # sort cands from highest to lowest vote count 107 | sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) 108 | 109 | # Determine winner, if any 110 | first = sorted_cands[0] 111 | second = sorted_cands[1] 112 | 113 | if first['votes'] != second['votes']: 114 | first['winner'] = 'X' 115 | 116 | # Get race metadata from one set of results 117 | result = cand_results.values()[0][0] 118 | # Add results to output 119 | summary[race_key] = { 120 | 'all_votes': all_votes, 121 | 'date': result['date'], 122 | 'office': result['office'], 123 | 'district': result['district'], 124 | 'candidates': sorted_cands, 125 | } 126 | 127 | return summary 128 | 129 | 130 | def write_csv(summary): 131 | """Generates CSV from summary election results data 132 | 133 | CSV is written to 'summary_results.csv' file, inside same directory 134 | as this module. 135 | 136 | """ 137 | outfile = join(dirname((__file__)), 'summary_results.csv') 138 | with open(outfile, 'wb') as fh: 139 | # Limit output to cleanly parsed, standardized values 140 | fieldnames = [ 141 | 'date', 142 | 'office', 143 | 'district', 144 | 'last_name', 145 | 'first_name', 146 | 'party', 147 | 'all_votes', 148 | 'votes', 149 | 'winner', 150 | ] 151 | writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL) 152 | writer.writeheader() 153 | for race, results in summary.items(): 154 | cands = results.pop('candidates') 155 | for cand in cands: 156 | results.update(cand) 157 | writer.writerow(results) 158 | 159 | 160 | # Q: What on earth is this __name__ == __main__ thing? 161 | # A: Syntax that lets you execute a module as a script. 162 | # http://docs.python.org/2/tutorial/modules.html#executing-modules-as-scripts 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /_docs/phase1.rst: -------------------------------------------------------------------------------- 1 | (Re)designing code 2 | ================== 3 | 4 | Code smells 5 | ----------- 6 | 7 | We begin with a single, linear script in the `elex1/ `__ directory. 8 | Below are a few reasons why this `code smells `__ (some might say it 9 | reeks): 10 | 11 | - It's hard to understand. You have to read the entire script before 12 | getting a full sense of what it does. 13 | - It's hard to debug when something goes wrong. 14 | - It's pretty much impossible to test, beyond eye-balling the output 15 | file. 16 | - None of the code is reusable by other programs. 17 | 18 | Hacking-It-Out-As-You-Go 19 | ------------------------ 20 | 21 | Scripts like this are often born when a programmer dives immediately into implementing 22 | his code. He sees the end goal -- "summarize election data" -- and gets right to it, hacking 23 | his way through each step of the script until things "work". 24 | 25 | The hack-it-out-as-you-go approach can certainly produce working code. But unless you're extremely disciplined, 26 | this process can also yield spaghetti code -- a jumble of hard-to-decipher and error-prone logic that you fear changing. 27 | 28 | So, how do we avoid spaghetti code? *By choosing to have lots of small problems instead of one big problem.* 29 | 30 | Lots of small problems 31 | ---------------------- 32 | 33 | A key step in the art of designing code is hitting the breaks up front and spending a few minutes thinking 34 | through the problem at hand. Using this approach, you'll quickly discover that you don't really 35 | have one big problem (*"summarize some election data"*) but a series of small problems: 36 | 37 | * Download election data 38 | * Parse election data 39 | * Calculate candidate vote totals and determine winners 40 | * Create a summary spreadsheet 41 | 42 | Each of those smaller problems, in turn, can often be decomposed into a series of smaller steps, some of 43 | which don't become clear until you've started writing the code. 44 | 45 | But it's critical at this phase to *NOT* start writing code!!! You will be tempted, but doing so will 46 | switch your brain from "design mode" to the more myopic "code" mode (it's a thing). Trust in your ability 47 | to implement the code when the time is right (we promise, you'll figure it out), and instead grant yourself a 48 | few minutes of freedom *to design the code*. 49 | 50 | If you just can't resist implementing code as you design, then close your laptop 51 | and go old school with pen and paper. A mind-map or flow-chart is a great way to hash out the 52 | high-level design and flow of your program. Or if you're lucky enough to have a whiteboard, 53 | use that to sketch out the initial steps of your program. 54 | 55 | Some folks also like writing `pseudocode `__, 56 | though beware the siren's call to slip back into implementing "working" code 57 | (Python in particular makes this extremely easy). 58 | 59 | *Fun fact*: Jeremy and I are so enthusiastic about whiteboarding that we once sketched out a 60 | backyard goat roast on an office wall (said design was never implemented). 61 | 62 | Shred this code (on paper) 63 | -------------------------- 64 | 65 | In this tutorial, we already have some ready-baked `spaghetti code `__ 66 | for you to slice and dice into smaller components. 67 | 68 | We encourage you to print the code on paper -- yes, dead trees! -- and use a marker to group code bits 69 | into separate functions. As you to try to make sense of the logic and data structures, it's a good idea to reference the 70 | `source data `__. 71 | 72 | This exercise is intended to familiarize you with the data and the mechanics of the code, and get your 73 | creative juices flowing. As you read the code, think about which sections of logic are related (perhaps they 74 | process some piece of data, or apply a process to a bunch of data in a loop). 75 | 76 | Use circles, brackets, arrows -- whatever marks on paper you need to group together such related bits of code. 77 | Then, try to give them *meaningful names*. These names will become the functions that wrap these bits of logic. 78 | 79 | `Naming things is hard `__, and can become *really hard* if a function is trying to do too many things. 80 | If you find yourself struggling to come up with a clear function name, ask yourself if breaking down the section of code into even smaller parts ( 81 | say two or three functions instead of one) would make it easier to assign a clear and meaningful name to each function. 82 | 83 | Finally, spend some time thinking about how all these new bits of code will interact. Will one of the functions require an input that comes 84 | from another function? This orchestration of code is typically handled in a function called `main `__, 85 | which serves as the entry point and quarterback for the entire script. 86 | 87 | Keep in mind there's no "right" approach or solution here. The overarching goal is to improve the *readability of the code*. 88 | 89 | Whether you resort to pseudocode, a whiteboard, or simple pen-on-paper, the point is to stop thinking 90 | about *how to implement the code* and instead focus on *how to design the program*. 91 | 92 | Once the code design process is complete, try implementing the design. Ask yourself how this process compared to prior efforts to 93 | write a script (or unravel someone else's code). Was it easier? Harder? Is the end product easier to read and understand? 94 | 95 | In the next section, you'll see our pass at the same exercise, and learn how to further improve this script by organizing functions into 96 | new source files. 97 | 98 | 99 | Questions 100 | --------- 101 | 102 | - What are `unit 103 | tests `__? 104 | - Can you identify three sections of logic that could be unit tested? 105 | - What are 106 | `modules `__? 107 | - What are 108 | `packages `__? 109 | 110 | Exercises 111 | --------- 112 | 113 | - Slice up this code into a bunch of functions, where related bits of 114 | logic are grouped together. Do function names accurately reflect what they 115 | actually do? If not, how could you rename functions and/or re-organize the code 116 | to clarify the purpose of each function? 117 | - Compare your revised script to `our version `__. 118 | What's similar? What's different? Explain 5 things you like or dislike about each and why. 119 | - Write a unit test for one or more functions extracted from this module. 120 | -------------------------------------------------------------------------------- /_docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # refactoring101 documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Feb 24 22:03:28 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'refactoring101' 47 | copyright = u'2014, Serdar Tumgoren (AP) and Jeremy Bowers (NPR)' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '0.1' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '0.1' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all 73 | # documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | # A list of ignored prefixes for module index sorting. 91 | #modindex_common_prefix = [] 92 | 93 | # If true, keep warnings as "system message" paragraphs in the built documents. 94 | #keep_warnings = False 95 | 96 | 97 | # -- Options for HTML output ---------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | #html_theme = 'default' 102 | html_theme = 'sphinxdoc' 103 | 104 | # Theme options are theme-specific and customize the look and feel of a theme 105 | # further. For a list of options available for each theme, see the 106 | # documentation. 107 | #html_theme_options = {} 108 | 109 | # Add any paths that contain custom themes here, relative to this directory. 110 | #html_theme_path = [] 111 | 112 | # The name for this set of Sphinx documents. If None, it defaults to 113 | # " v documentation". 114 | #html_title = None 115 | 116 | # A shorter title for the navigation bar. Default is the same as html_title. 117 | #html_short_title = None 118 | 119 | # The name of an image file (relative to this directory) to place at the top 120 | # of the sidebar. 121 | #html_logo = None 122 | 123 | # The name of an image file (within the static path) to use as favicon of the 124 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 125 | # pixels large. 126 | #html_favicon = None 127 | 128 | # Add any paths that contain custom static files (such as style sheets) here, 129 | # relative to this directory. They are copied after the builtin static files, 130 | # so a file named "default.css" will overwrite the builtin "default.css". 131 | html_static_path = ['_static'] 132 | 133 | # Add any extra paths that contain custom files (such as robots.txt or 134 | # .htaccess) here, relative to this directory. These files are copied 135 | # directly to the root of the documentation. 136 | #html_extra_path = [] 137 | 138 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 139 | # using the given strftime format. 140 | #html_last_updated_fmt = '%b %d, %Y' 141 | 142 | # If true, SmartyPants will be used to convert quotes and dashes to 143 | # typographically correct entities. 144 | #html_use_smartypants = True 145 | 146 | # Custom sidebar templates, maps document names to template names. 147 | #html_sidebars = {} 148 | 149 | # Additional templates that should be rendered to pages, maps page names to 150 | # template names. 151 | #html_additional_pages = {} 152 | 153 | # If false, no module index is generated. 154 | #html_domain_indices = True 155 | 156 | # If false, no index is generated. 157 | #html_use_index = True 158 | 159 | # If true, the index is split into individual pages for each letter. 160 | #html_split_index = False 161 | 162 | # If true, links to the reST sources are added to the pages. 163 | #html_show_sourcelink = True 164 | 165 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 166 | #html_show_sphinx = True 167 | 168 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 169 | #html_show_copyright = True 170 | 171 | # If true, an OpenSearch description file will be output, and all pages will 172 | # contain a tag referring to it. The value of this option must be the 173 | # base URL from which the finished HTML is served. 174 | #html_use_opensearch = '' 175 | 176 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 177 | #html_file_suffix = None 178 | 179 | # Output file base name for HTML help builder. 180 | htmlhelp_basename = 'refactoring101doc' 181 | 182 | 183 | # -- Options for LaTeX output --------------------------------------------- 184 | 185 | latex_elements = { 186 | # The paper size ('letterpaper' or 'a4paper'). 187 | #'papersize': 'letterpaper', 188 | 189 | # The font size ('10pt', '11pt' or '12pt'). 190 | #'pointsize': '10pt', 191 | 192 | # Additional stuff for the LaTeX preamble. 193 | #'preamble': '', 194 | } 195 | 196 | # Grouping the document tree into LaTeX files. List of tuples 197 | # (source start file, target name, title, 198 | # author, documentclass [howto, manual, or own class]). 199 | latex_documents = [ 200 | ('index', 'refactoring101.tex', u'refactoring101 Documentation', 201 | u'Serdar Tumgoren (AP) and Jeremy Bowers (NPR)', 'manual'), 202 | ] 203 | 204 | # The name of an image file (relative to this directory) to place at the top of 205 | # the title page. 206 | #latex_logo = None 207 | 208 | # For "manual" documents, if this is true, then toplevel headings are parts, 209 | # not chapters. 210 | #latex_use_parts = False 211 | 212 | # If true, show page references after internal links. 213 | #latex_show_pagerefs = False 214 | 215 | # If true, show URL addresses after external links. 216 | #latex_show_urls = False 217 | 218 | # Documents to append as an appendix to all manuals. 219 | #latex_appendices = [] 220 | 221 | # If false, no module index is generated. 222 | #latex_domain_indices = True 223 | 224 | 225 | # -- Options for manual page output --------------------------------------- 226 | 227 | # One entry per manual page. List of tuples 228 | # (source start file, name, description, authors, manual section). 229 | man_pages = [ 230 | ('index', 'refactoring101', u'refactoring101 Documentation', 231 | [u'Serdar Tumgoren (AP) and Jeremy Bowers (NPR)'], 1) 232 | ] 233 | 234 | # If true, show URL addresses after external links. 235 | #man_show_urls = False 236 | 237 | 238 | # -- Options for Texinfo output ------------------------------------------- 239 | 240 | # Grouping the document tree into Texinfo files. List of tuples 241 | # (source start file, target name, title, author, 242 | # dir menu entry, description, category) 243 | texinfo_documents = [ 244 | ('index', 'refactoring101', u'refactoring101 Documentation', 245 | u'Serdar Tumgoren (AP) and Jeremy Bowers (NPR)', 'refactoring101', 'One line description of project.', 246 | 'Miscellaneous'), 247 | ] 248 | 249 | # Documents to append as an appendix to all manuals. 250 | #texinfo_appendices = [] 251 | 252 | # If false, no module index is generated. 253 | #texinfo_domain_indices = True 254 | 255 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 256 | #texinfo_show_urls = 'footnote' 257 | 258 | # If true, do not generate a @detailmenu in the "Top" node's menu. 259 | #texinfo_no_detailmenu = False 260 | -------------------------------------------------------------------------------- /_docs/phase4/swapout.rst: -------------------------------------------------------------------------------- 1 | Enter stage left - Races and Candidates 2 | ======================================= 3 | 4 | The *Candidate* and *Race* classes now encapsulate our core 5 | logic. It's time to put these classes to work. 6 | 7 | This is the step we've been waiting for -- where we simplify the 8 | parser ond summary code by outsourcing complex logic to simple 9 | domain models (i.e. *Candidate* and *Race* classes). 10 | 11 | Major code updates such as this feel like changing the engine on a moving car: 12 | It's scary, and you're never quite sure if an accident is waiting around the corner. 13 | Fortunately, we have a suite of tests that let us apply our changes and quickly get 14 | feedback on whether we broke anything. 15 | 16 | Let's start by swapping in the *Race* class in the parser code, the 17 | entry point of our application. The *Race* class replaces nested dictionaries 18 | and lists. 19 | 20 | 21 | Update Parser 22 | ------------- 23 | 24 | .. code:: python 25 | 26 | 27 | def parse_and_clean(): 28 | 29 | # ... snipped ... 30 | 31 | results = {} 32 | 33 | # Initial data clean-up 34 | for row in reader: 35 | # Convert votes to integer 36 | row['votes'] = int(row['votes']) 37 | 38 | # Store races by slugified office and district (if there is one) 39 | race_key = row['office'] 40 | if row['district']: 41 | race_key += "-%s" % row['district'] 42 | 43 | try: 44 | race = results[race_key] 45 | except KeyError: 46 | race = Race(row['date'], row['office'], row['district']) 47 | results[race_key] = Race 48 | 49 | race.add_result(row) 50 | 51 | # ... snipped ... 52 | 53 | Here are the list of changes: 54 | 55 | - Delete the candidate name parsing code 56 | - Simplify results storage and use try/except to get/create Race 57 | instances 58 | - Update Race and, by extension, candidate vote totals, by calling 59 | *add\_result* on *Race* instance. 60 | 61 | Before porting the *summarize* function to use this new input, let's 62 | update the parser tests and ensure evertyhing runs correctly. We'll 63 | tweak our test to use dotted-attribute notation instead of dictionary 64 | lookups, to reflect the new class-based approach. 65 | 66 | .. code:: python 67 | 68 | # elex4/tests/test_parser.py 69 | 70 | class TestParser(TestCase): 71 | 72 | def test_name_parsing(self): 73 | "Parser should split full candidate name into first and last names" 74 | race = results['President'] 75 | smith = [cand for cand in race.candidates.values() if cand.last_name == 'Smith'][0] 76 | # Below lines changed from dictionary access 77 | self.assertEqual(smith.first_name, 'Joe') # formerly, smith['first_name'] 78 | self.assertEqual(smith.last_name, 'Smith') # formerly, smith['last_name'] 79 | 80 | Now run the tests: 81 | 82 | :: 83 | 84 | nosetests -v elex4/tests/test_parser.py 85 | 86 | The updated *parse\_and\_clean* function is easier to read and maintain 87 | than its original version, but it could still be much improved. For 88 | instance, we could easily hide the race-key logic and type conversion of 89 | votes inside the *Race* class. 90 | 91 | We could also transform the function into a class, and encapsulate the 92 | get/create logic for *Race* instances in a private method, similar to 93 | the \*Race.\_\_get\_or\_create\_candidate\* method. 94 | 95 | We'll leave such refactorings as exercises for the reader. 96 | 97 | Exercises 98 | ^^^^^^^^^ 99 | 100 | - The *parse\_and\_clean* function, though simplified, still has too 101 | much cruft. Perform the following refactorings: 102 | - Move code that converts votes to an integer inside the *Race* class 103 | - Create a *Race.key* 104 | `property `__ 105 | that encapsulates this logic, and remove it from the parser function 106 | - Simplify the return value of *parse\_and\_clean* to only return a 107 | list of *Race* instances, rather than a dictionary. This will require 108 | also refactoring the *summarize* function 109 | - Refactor the *parse\_and\_clean* function into a *Parser* class with 110 | a private \*\_\_get\_or\_create\_race\* method. 111 | 112 | Update Summary 113 | -------------- 114 | 115 | Refactoring the *summarize* function is a bit trickier than the parser 116 | code, since we plan to change the input data for this function. Recall 117 | that the parser code now returns a dict of *Race* instances, rather than 118 | nested dicts. The *summarize* function needs to be updated to handle 119 | this type of input. 120 | 121 | This also means that we can no longer feed the test fixture JSON, as is, 122 | to the *summarize* function in our *setUp* method. Instead, we need to 123 | build input data that mirrors what would be returned by the updated 124 | *parse\_and\_clean* function: Namely, a dictionary containing *Race* 125 | instances as values. 126 | 127 | First, we'll simplify the test fixtures by removing the nested object 128 | structure. Instead, we'll make them a simple array of result objects. 129 | 130 | Note: We could re-use the same JSON fixtures from *elex3* without 131 | modification, but this would result in a more convoluted *setUp* 132 | method. Wherever possible, use the simplest test data possible. 133 | 134 | Then we'll update the *setUp* method to handle our simpflified JSON 135 | fixtures, and we'll move into a new *TestSummaryBase* class. 136 | *TestSummaryResults* and *TestTieRace* will *sub-class* this new base 137 | class instead of *TestCase*, allowing them both to make use of the same 138 | *setUp* code. 139 | 140 | This is an example of class 141 | `inheritance `__. 142 | Python classes can inherit methods and attributes from other classes by 143 | *subclassing* one or more parent classes. This is a powerful, core 144 | concept of object-oriented programming that helps keep code clean and 145 | re-usable. 146 | 147 | And it's one that we've been using for a while, when we subclassed 148 | *unittest.TestCase* in our test classes. We're essentially substituting 149 | our own parent class, one that blends the rich functionality of 150 | *TestCase* with a custom *setUp* method. This allows the same *setUp* 151 | code to be used by methods in multiple subclasses. 152 | 153 | .. code:: python 154 | 155 | 156 | class TestSummaryBase(TestCase): 157 | 158 | def setUp(self): 159 | # Recall that sample data only has a single Presidential race 160 | race = Race('2012-11-06', 'President', '') 161 | for result in self.SAMPLE_RESULTS: 162 | race.add_result(result) 163 | # summarize function expects a dict, keyed by race 164 | summary = summarize({'President': race}) 165 | self.race = summary['President'] 166 | 167 | 168 | # Update the main test classes to inherit this base class, instead of 169 | # directly from TestCase 170 | 171 | class TestSummaryResults(TestSummaryBase): 172 | 173 | # ... snipped ... 174 | 175 | 176 | class TestTieRace(TestSummaryBase): 177 | 178 | # ... snipped ... 179 | 180 | If you ran the *test\_summary.py* suite now, you'd see all tests 181 | failing. 182 | 183 | Now we're ready to swap in our new class-based implementation. This time 184 | we'll be deleting quite a bit of code, and tweaking what remains. Below 185 | is the new code, followed by a list of major changes: 186 | 187 | .. code:: python 188 | 189 | 190 | # We removed the defaultdict and use a plain-old dict 191 | summary = {} 192 | 193 | for race_key, race in results.items(): 194 | cands = [] 195 | # Call our new assign_winner method 196 | race.assign_winner() 197 | # Loop through Candidate instances and extract a dictionary 198 | # of target values. Basically, we're throwing away county-level 199 | # results since we don't need those for the summary report 200 | for cand in race.candidates.values(): 201 | # Remove lower-level county results 202 | # This is a dirty little trick to botainfor easily obtaining 203 | # a dictionary of candidate attributes. 204 | info = cand.__dict__.copy() 205 | # Remove county results 206 | info.pop('county_results') 207 | cands.append(info) 208 | 209 | summary[race_key] = { 210 | 'all_votes': race.total_votes, 211 | 'date': race.date, 212 | 'office': race.office, 213 | 'district': race.district, 214 | 'candidates': cands, 215 | } 216 | 217 | return summary 218 | 219 | Changes to the *summariz* function include: 220 | 221 | - Convert *summary* output to plain dictionary (instead of defaultdict) 222 | - Delete all code for sorting and determining winner. This is replaced 223 | by a call to the *assign\_winner* method on Race classes. 224 | - Create a list of candidate data as dictionaries without county-level 225 | results 226 | - Update code that adds data to the *summary* dictionary to use the 227 | race instance and newly created *cands* list. 228 | 229 | Of course, we should run our test to make sure the implementation works. 230 | 231 | :: 232 | 233 | nosetests -v elex4/tests/test_summary.py 234 | 235 | At this point, our refactoring work is complete. We should verify that 236 | all tests run without failures: 237 | 238 | :: 239 | 240 | nosetests -v elex4/tests/test_*.py 241 | 242 | Overall, the *summarize* function has grown much simpler by outsourcing 243 | the bulk of work to the *Race* and *Candidate* classes. In fact, it 244 | could be argued that the *summarize* function doesn't do enough at this 245 | point to justify its existence. Its main role is massaging data into a 246 | form that plays nice with the *save\_summary\_to\_csv.py* script. 247 | 248 | It might make sense to push the remaining bits of logic into the 249 | Race/Candidate model classes and the *save\_summary\_to\_csv.py* script. 250 | 251 | You'll also notice that the *summary* tests closely mirror those for the 252 | *Race* class in *elex4/tests/test\_models.py*. Redundant tests can cause 253 | confusion and add maintenance overhead. 254 | 255 | It would make sense at this point to delete the *summarize* tests for 256 | underlying functionality -- tallying votes, assigning winners -- and 257 | create new tests specific to the summary output. For example, you could 258 | write a test that ensures the output structure meets expections. 259 | 260 | Questions 261 | ^^^^^^^^^ 262 | 263 | - What is a class attribute? 264 | - How does Python construct classes? 265 | - What is the 266 | `\_\_dict\_\_ `__ 267 | special attribute on a class? 268 | - How can the built-in 269 | `type `__ 270 | function be used to construct classes dynamically? 271 | 272 | Exercises 273 | ^^^^^^^^^ 274 | 275 | - Implement a *Race.summary* 276 | `property `__ 277 | that returns all data for the instance, minus the *Candidate* county 278 | results. Swap this implementation into the *summarize* function. 279 | - Delete tests in *elex4/tests/test\_summary.py* and add a new test 280 | that verifies the structure of the output. 281 | -------------------------------------------------------------------------------- /_docs/phase4/races.rst: -------------------------------------------------------------------------------- 1 | Races have Candidates! 2 | ====================== 3 | 4 | With the basics of our `Candidate` class out of the way, let's 5 | move on to building out the `Race` class. This higher-level 6 | class will manage updates to our candidate instances, along with metadata 7 | about the race itself such as election date and office/district. 8 | 9 | Recall that in *elex3*, the *lib/parser.py* ensured that county-level results were assigned to the appropriate candidate. 10 | We'll now migrate that logic over to the `Race` class, along with a few other repsonsibilities: 11 | 12 | - Tracking overall vote count for the race 13 | - Updating candidates with new county-level votes 14 | - Determining which candidate, if any, won the race 15 | 16 | Metadata and Total votes 17 | ------------------------ 18 | 19 | The *Race* class keeps a running tally of all votes. This figure is 20 | the sum of all county-level votes received by individual candidates. 21 | 22 | Let's build out the *Race* class with basic metadata fields and an *add\_result* method 23 | that updates the total vote count. 24 | 25 | This should be pretty straight-forward, and you'll notice that the tests mirror those used to 26 | perform vote tallies on *Candidate* instances. 27 | 28 | .. code:: python 29 | 30 | 31 | # Don't forget to import Race from elex4.lib.models at the top of your test module! 32 | 33 | class TestRace(TestCase): 34 | 35 | def setUp(self): 36 | self.smith_result = { 37 | 'date': '2012-11-06', 38 | 'candidate': 'Smith, Joe', 39 | 'party': 'Dem', 40 | 'office': 'President', 41 | 'county': 'Fairfax', 42 | 'votes': 2000, 43 | } 44 | self.race = Race("2012-11-06", "President", "") 45 | 46 | def test_total_votes_default(self): 47 | "Race total votes should default to zero" 48 | self.assertEquals(self.race.total_votes, 0) 49 | 50 | def test_total_votes_update(self): 51 | "Race.add_result should update racewide vote count" 52 | self.race.add_result(self.smith_result) 53 | self.assertEquals(self.race.total_votes, 2000) 54 | 55 | Go ahead and run those tests and watch them fail. 56 | 57 | Now let's build out our initial *Race* class with an *add\_result* 58 | method to make the tests pass. 59 | 60 | :: 61 | 62 | class Race(object): 63 | 64 | def __init__(self, date, office, district): 65 | self.date = date 66 | self.office = office 67 | self.district = district 68 | self.total_votes = 0 69 | 70 | def add_result(self, result): 71 | self.total_votes += result['votes'] 72 | 73 | Candidate Bookkeeping 74 | --------------------- 75 | 76 | In earlier phases of the project, the parser code ensured that 77 | county-level results were grouped with the appropriate, unique candidate 78 | in each race. If you recall, those county results were stored in a list 79 | for each candidate: 80 | 81 | .. code:: python 82 | 83 | # elex3.lib.parser.py 84 | 85 | def parse_and_clean 86 | 87 | # ... snipped... 88 | 89 | # Store county-level results by slugified office and district (if there is one), 90 | # then by candidate party and raw name 91 | race_key = row['office'] 92 | if row['district']: 93 | race_key += "-%s" % row['district'] 94 | 95 | # Create unique candidate key from party and name, in case multiple candidates have same 96 | cand_key = "-".join((row['party'], row['candidate'])) 97 | 98 | # Get or create dictionary for the race 99 | race = results[race_key] 100 | 101 | # Get or create candidate dictionary with a default value of a list; Add result to the list 102 | race.setdefault(cand_key, []).append(row) 103 | 104 | We now have Candidate classes that manage their own county results. But 105 | we need to migrate the bookkeeping of Candidate instances from the 106 | parser code to the *Race* class. Specifically, we need create a new 107 | Candidate instance or fetch a pre-existing instance, as appropriate, for 108 | each county result. 109 | 110 | Let's start by adding a test to our *TestRace* class that ensures we're 111 | updating a single candiate instance, rather than accidentally creating 112 | duplicate instances. 113 | 114 | .. code:: python 115 | 116 | 117 | class TestRace(TestCase): 118 | 119 | # ... snipped ... 120 | 121 | def test_add_result_to_candidate(self): 122 | "Race.add_result should update a unique candidate instance" 123 | # Add a vote twice. If it's the same candidate, vote total should be sum of results 124 | self.race.add_result(self.smith_result) 125 | self.race.add_result(self.smith_result) 126 | cand_key = (self.smith_result['party'], self.smith_result['candidate']) 127 | candidate = self.race.candidates[cand_key] 128 | self.assertEquals(candidate.votes, 4000) 129 | 130 | Run that test and watch it fail. You'll notice we have a new 131 | *candidates* attribute that is a dictionary. This is pretty much the 132 | same approach we used in earlier phases, where we stored candidate data 133 | by a unique key. However, instead of using a slug, we're now using 134 | tuples as keys. 135 | 136 | Accessing *candidate* data directly in this way is a code smell, and 137 | it could be argued that we should also write a candidate lookup 138 | method. We'll leave that as an exercise. 139 | 140 | Now let's update the *Race* class and its *add\_result* method to make 141 | the test pass. 142 | 143 | .. code:: python 144 | 145 | 146 | class Race(object): 147 | 148 | def __init__(self, date, office, district): 149 | # .... snipped .... 150 | # We add the candiddates dictionary 151 | self.candidates = {} 152 | 153 | def add_result(self, result): 154 | self.total_votes += result['votes'] 155 | # Below lines 156 | candidate = self.__get_or_create_candidate(result) 157 | candidate.add_votes(result['county'], result['votes']) 158 | 159 | # Private methods 160 | def __get_or_create_candidate(self, result): 161 | key = (result['party'], result['candidate']) 162 | try: 163 | candidate = self.candidates[key] 164 | except KeyError: 165 | candidate = Candidate(result['candidate'], result['party']) 166 | self.candidates[key] = candidate 167 | return candidate 168 | 169 | Above, the bulk of our work is handled by a new private method called 170 | \_\_get\_or\_create\_candidate. This method attempts to fetch a 171 | pre-existing *\ Candidate\* instance or creates a new one and adds it to 172 | the dictionary, before returning the instance. 173 | 174 | Once we have the correct instance, we call its *add\_votes* method to 175 | update the vote count and add the result to that candidate's county 176 | results list. 177 | 178 | Our test verifies this by calling the *add\_result* method twice and 179 | then checking the candidate instance's vote count to ensure the vote 180 | count is correct. 181 | 182 | Testing purists may point out that we've violated the principle of 183 | `test isolation `__, since 184 | this unit test directly accesses the candidate instance and relies 185 | on its underlying vote tallying logic. There are testing strategies 186 | and tools, such as mocks, to help avoid or minimize such *tight 187 | coupling* between unit tests. For the sake of simplicity, we'll wave 188 | our hand at that issue in this tutorial and leave it as a study 189 | exercise for the reader. 190 | 191 | Assigning Winners 192 | ----------------- 193 | 194 | We're now ready for the last major piece of the puzzle, namely, 195 | migrating the code that determines race winners. This logic was 196 | previously handled in the *summary* function and its related tests. 197 | 198 | .. code:: python 199 | 200 | # elex3/lib/summary.py 201 | 202 | # ... snipped .... 203 | 204 | # sort cands from highest to lowest vote count 205 | sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) 206 | 207 | # Determine winner, if any 208 | first = sorted_cands[0] 209 | second = sorted_cands[1] 210 | 211 | if first['votes'] != second['votes']: 212 | first['winner'] = 'X' 213 | 214 | # ... snipped .... 215 | 216 | We'll migrate our tests and apply some minor updates to reflect the fact 217 | that we're now storing data in Candidate and Race classes, rather than 218 | nested dictionaries and lists. 219 | 220 | It's important to note that while we're modifying the test syntax to 221 | accommodate our new objects, we're not changing the *substance* of 222 | the tests. 223 | 224 | First, let's add an extra sample result to the *setUp* method to support 225 | each test. 226 | 227 | .. code:: python 228 | 229 | 230 | # elex4/tests/test_models.py 231 | 232 | class TestRace(TestCase): 233 | 234 | def setUp(self): 235 | 236 | 237 | # ... snipped .... 238 | 239 | self.doe_result = { 240 | 'date': '2012-11-06', 241 | 'candidate': 'Doe, Jane', 242 | 'party': 'GOP', 243 | 'office': 'President', 244 | 'county': 'Fairfax', 245 | 'votes': 1000, 246 | } 247 | 248 | Next, let's migrate the winner, non-winner and tie race tests from 249 | *elex3/tests/test\_summary* to the *TestRace* class in 250 | *elex4/tests/test\_models.py*. 251 | 252 | .. code:: python 253 | 254 | 255 | class TestRace(TestCase): 256 | 257 | # ... snipped .... 258 | 259 | def test_winner_has_flag(self): 260 | "Winner flag should be assigned to candidates with most votes" 261 | self.race.add_result(self.doe_result) 262 | self.race.add_result(self.smith_result) 263 | # Our new method triggers the assignment of the winner flag 264 | self.race.assign_winner() 265 | smith = [cand for cand in self.race.candidates.values() if cand.last_name == 'Smith'][0] 266 | self.assertEqual(smith.winner, 'X') 267 | 268 | def test_loser_has_no_winner_flag(self): 269 | "Winner flag should not be assigned to candidate that does not have highest vote total" 270 | self.race.add_result(self.doe_result) 271 | self.race.add_result(self.smith_result) 272 | self.race.assign_winner() 273 | doe = [cand for cand in self.race.candidates.values() if cand.last_name == 'Doe'][0] 274 | 275 | def test_tie_race(self): 276 | "Winner flag should not be assigned to any candidate in a tie race" 277 | # Modify Doe vote count to make it a tie for this test method 278 | self.doe_result['votes'] = 2000 279 | self.race.add_result(self.doe_result) 280 | self.race.add_result(self.smith_result) 281 | self.race.assign_winner() 282 | for cand in self.race.candidates.values(): 283 | self.assertEqual(cand.winner, '') 284 | 285 | These tests mirror the test methods in *elex3/tests/test\_summary.py*. 286 | We've simply tweaked them to reflect our class-based apprach and to 287 | exercise the new *Race* method that assigns the winner flag. 288 | 289 | We'll eventually delete the duplicative tests in *test\_summary.py*, but 290 | we're not quite ready to do so yet. 291 | 292 | First, let's make these tests pass by tweaking the *Candidate* class and 293 | implementing the *Race.assign\_winner* method: 294 | 295 | .. code:: python 296 | 297 | # elex4/lib/models.py 298 | 299 | class Candidate(object): 300 | 301 | def __init__(self, raw_name, party): 302 | 303 | # ... snipped... 304 | 305 | # Add a new winner attribute to candidate class with empty string as default value 306 | self.winner = '' 307 | 308 | 309 | class Race(object): 310 | 311 | # ... snipped... 312 | 313 | def assign_winner(self): 314 | # Sort cands from highest to lowest vote count 315 | sorted_cands = sorted(self.candidates.values(), key=attrgetter('votes'), reverse=True) 316 | 317 | # Determine winner, if any 318 | first = sorted_cands[0] 319 | second = sorted_cands[1] 320 | 321 | if first.votes != second.votes: 322 | first.winner = 'X' 323 | 324 | Above, notice that we added a default *Candidate.winner* attribute, and 325 | a *Race.assign\_winner* method. The latter is nearly a straight copy of 326 | our original winner-assignment logic in the *summarize* function. The 327 | key differences are: 328 | 329 | - We're calling *self.candidate.values()* to get a list of *Candidate* 330 | instances, since these are now stored in a dictionary. 331 | - We're using *attrgetter* instead of *itemgetter* to access the 332 | candidate's vote count for purposes of sorting. This is necessary, of 333 | course, because we're now sorting by the value of an instance 334 | attribute rather than the value of a dictionary key. 335 | - We're accessing the *votes* attribute on candidate instances rather 336 | than performing dictionary lookups. 337 | --------------------------------------------------------------------------------