├── .gitignore ├── README.rst ├── TODO ├── bin └── run.sh ├── pypitches ├── __init__.py ├── baseball_query.py ├── baseball_test.yaml ├── beckett_break.yaml ├── buchholz.yaml ├── clustering.py ├── download.py ├── lester_ch.yaml ├── lester_cu.yaml ├── load.py ├── model │ ├── __init__.py │ ├── classes.py │ └── session.py ├── notes ├── plot_pitch_locations.py ├── plot_pitch_types.py ├── pypitches.py ├── scatterplot.py ├── select_gamedirs.py ├── settings.py ├── setup_postgres.py ├── shrink.py ├── test │ ├── __init__.py │ ├── static │ │ └── testdummy │ │ │ ├── year_2011 │ │ │ └── month_08 │ │ │ │ └── day_01 │ │ │ │ └── gid_2011_08_01_clemlb_bosmlb_1 │ │ │ │ ├── boxscore.xml │ │ │ │ ├── game.xml │ │ │ │ ├── inning │ │ │ │ └── inning_all.xml │ │ │ │ └── players.xml │ │ │ └── year_2012 │ │ │ └── month_07 │ │ │ └── day_01 │ │ │ ├── .DS_Store │ │ │ ├── gid_2012_07_01_arimlb_milmlb_1 │ │ │ ├── boxscore.xml │ │ │ ├── game.xml │ │ │ ├── inning │ │ │ │ └── inning_all.xml │ │ │ └── players.xml │ │ │ ├── gid_2012_07_01_bosmlb_seamlb_1 │ │ │ ├── boxscore.xml │ │ │ ├── game.xml │ │ │ ├── inning │ │ │ │ └── inning_all.xml │ │ │ └── players.xml │ │ │ └── gid_2012_07_01_chamlb_nyamlb_1 │ │ │ ├── boxscore.xml │ │ │ ├── game.xml │ │ │ ├── inning │ │ │ └── inning_all.xml │ │ │ └── players.xml │ ├── test_basics.py │ └── test_plots.py ├── wakefield.yaml └── web │ ├── __init__.py │ ├── app.py │ ├── static │ ├── countries.json │ ├── datatables.css │ ├── fixed.css │ ├── jquery.dataTables.js │ └── jquery.dataTables.min.js │ └── templates │ ├── controls.html │ ├── layout.html │ ├── login.html │ ├── register.html │ ├── status.html │ └── timeline.html ├── requirements.txt ├── setup.py └── sql └── baseball.sql /.gitignore: -------------------------------------------------------------------------------- 1 | gd2.tar.bz2 2 | download_log.txt 3 | *.pyc 4 | gosdb.sqlite 5 | pbp2.sql 6 | old 7 | *.sqlite 8 | *.log 9 | *.err 10 | *.png 11 | *.pdf 12 | *.svg 13 | downloads 14 | .*.swp 15 | *.sqlite-journal 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | PyPitches 3 | ========= 4 | 5 | Introduction 6 | ------------ 7 | 8 | This is a set of scripts to download PITCHf/x XML data from mlb.com, 9 | to load that data into a PostgreSQL database, 10 | and to generate figures from that data. 11 | 12 | Requires 13 | -------- 14 | See requirements.txt 15 | 16 | Or just create a virtualenv and 17 | 18 | .. code:: bash 19 | 20 | pip install -r requirements.txt 21 | 22 | Getting Started 23 | --------------- 24 | 25 | - See requirements 26 | 27 | .. code:: bash 28 | 29 | nosetests pypitches/pypitches/test 30 | python pypitches/download.py 31 | 32 | 33 | - and get a cup of coffee, or bake a cake or something 34 | - consider editing the patterns at the top of ``download.py`` to restrict the download to certain months 35 | 36 | .. code:: bash 37 | 38 | python pypitches/pypitches.py classify download/ 39 | python pypitches/pypitches.py load 40 | 41 | 42 | ToDo 43 | ---- 44 | - Quantify the effect of fried chicken and beer on Red Sox pitching in September 2011? 45 | - See Github issues_. 46 | 47 | .. _issues: https://github.com/gregoryck/pypitches/issues 48 | 49 | 50 | What do the columns mean? 51 | ------------------------- 52 | 53 | Copied from Alan M. Nathan's glossary_. 54 | 55 | .. _glossary: http://fastballs.wordpress.com/2007/08/02/glossary-of-the-gameday-pitch-fields/ 56 | 57 | x, y, and z 58 | location of pitch as it crosses the front of home plate. The units are in camera pixels and are therefore not very useful. I recommend ignoring these two parameters and instead use p_x and p_z, defined below. 59 | 60 | start_speed 61 | speed of ball in mph at the starting position (defined below). 62 | 63 | end_speed 64 | speed of ball in mph as it crosses the front of home plate, located 1.417 ft from the point of home plate (i.e., at the coordinate y=1.417). Note the end_speed is less than start_speed due to the effect of air resistance. 65 | 66 | sz_top, sz_bottom 67 | a line of constant z (in ft) defining the lower and upper limits, respectively, of the strike zone. That is, these are the height above home plate of the top and bottom of the strike zone. Currently, these parameters are set for each batter by the operator by visually observing the image from the center-field camera. 68 | 69 | pfx_x,pfx_z 70 | The deviation (in inches) of the pitch trajectory from a straight-line in the x (horizontal) and z (vertical) directions between y=40 ft and the front edge of home plate, y=1.417 ft. It is important to note two things. First, the initial value is y=40 ft, regardless of the value of the initial value y0 (defined below). If the pitcher's release point had been used (approximately y=55 ft), then the deviation would have been nearly twice as large. Second, the effect of gravity has been removed from pfx_z, so that both parameters are the "break" of the pitch due to the Magnus force on a spinning baseball. Note that the online Gameday reports the quantity pfx, which is presumably the square root of pfx_x2+pfx_z2. Given our sign conventions, a positive value of pfx_x cooresponds a deviation to the catcher's right and a negative value to the catcher's left. Similarly, a postive value of of pfx_z is a pitch the drops less than it would from gravity alone (most pitches fall in this category), whereas a negative value is a pitch that drops more than from gravity alone (e.g., a "12-6" curveball). 71 | 72 | p_x, p_z 73 | location of pitch in the x and z coordinates, respectively, as it crosses the front of home plate, in units of ft. When you watch Gameday, this is the location of the dot on the screen that appears for each pitch. It is computed from the tracked trajectory. There is a one-to-one correlation between p_x and x and between p_z and y (see above description of x and y). 74 | 75 | x0,y0,z0, vx0,vy0,vz0,ax,ay,az 76 | These parameters are the most important ones in the database, since all others are computed using these. The parameters represent the result of making a least-squares fit to the measured trajectory assuming constant acceleration, for each of the three dimensions. The first three parameters are the initial positions in ft, the next three are initial velocities in ft/s, the next three are the accelerations (assumed constant) in ft/s2. To calculate the full trajectory, use the formula x(t)=x0+vx0*t+0.5*ax*t^2, and a similar formula for y(t) and z(t), where t is the time. Note that y0, which is the distance from the point of home plate where the tracking begins, is 50 ft for the file discussed above although earlier files used 55 ft or 40 ft. This point is the location where the start_speed is determined and from which the break parameters (see below) are calculated.. Note that start_speed is just the square root of vx_02+vy_02+vz_02, converted to mph. 77 | 78 | break_y,break_angle, break_length 79 | These quantities refer to a different definition of "break" than the quantities pfx_x, pfx_z defined above. They are arrived at as follows. A straight line is drawn from the starting location x0,y0,z0 to the final location defined by p_x, p_z and y=1.417 ft. Such a line determines a straight-line trajectory from starting point to ending point. That trajectory is then compared to the actual trajectory determined by the constant acceleration fit to the data. The quantity break_length is the largest deviation, in inches, of the actual from the straight-line trajectory. The quantity break_y is the y-distance from home plate where the maximum deviation occurs. The quantity break_angle is the direction of the deviation, with the convention that a pitch that breaks away from or toward a RHH has a negative or positive angle, respectively; a break_angle of 0 is a pitch with no horizontal break and is typical of a straight fastball. Most pitches will have a break_angle between about -50o and +50o. An angle greater than 90o in absolute value is almost impossible, since that would imply an upward break, defeating gravity (the myth of the "rising fastball"). Both break_length and break_angle are shown on the Gameday screen. Also the break_angle is indicated with an arrow that points straight down for 0o, down and to the left for a positive angle, or down and to the right for a negative angle. You can use the break_angle to find the x and z components of break_length as follows: break_x = break_length*sin(break_angle) and break_z = break_length*cos(break_angle). Note that break_x has the opposite sign as pfx_x because of the convention used to define the angle. That is, a pitch that breaks away from a RHH (i.e., toward the catcher's right) has a positive pfx_x but a negative break_x. 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | --------------- 2 | PyPitches To Do 3 | --------------- 4 | 5 | Web stuff 6 | --------- 7 | 8 | - matplotlib generates HTML5 canvas now? How? 9 | 10 | - compare to Dojo charting 11 | 12 | - an interface to drive updating 13 | 14 | 15 | Database stuff 16 | -------------- 17 | 18 | - Record update events. Record raw files. Make permanent note of why some are rejected. Starting-off point for new update. 19 | 20 | - Faster loading. Indexless temporary table? 21 | 22 | - Table:: 23 | 24 | downloads 25 | file text 26 | downloaded timestamp 27 | accepted boolean 28 | reject_reason integer foreign key references reject_reasons 29 | -- or just enum? 30 | 31 | 32 | 33 | Other 34 | ----- 35 | 36 | - Semiauto updating. web interface invokes Download.py -> load.py, no storing files inbetween. Machine knows what the last download and load were 37 | -------------------------------------------------------------------------------- /bin/run.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | #python download.py 4 | #python select_gamedirs.py gamedirs.yaml 5 | #python load.py 6 | 7 | # Alternatively... 8 | # download pitches.sql from http://web.mit.edu/gkettler/pitches.sql.gz 9 | # uncompress it and load with 10 | sqlite3 baseball.sqlite < pitches.sql 11 | python plot_pitch_types.py wakefield.yaml 12 | python plot_pitch_types.py buchholz.yaml 13 | python scatterplot.py beckett_break.yaml 14 | python plot_pitch_locations.py lester_ch.yaml 15 | 16 | rst2html readme.rst > readme.html 17 | -------------------------------------------------------------------------------- /pypitches/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pypitches/baseball_query.py: -------------------------------------------------------------------------------- 1 | # baseball_query.py 2 | # Set of utility functions to grab certain pitches 3 | # by constructing the SQLAlchemy query, 4 | # and return NumPy records for more convenient plotting. 5 | 6 | import numpy 7 | from model import Pitch, AtBat, Player, Game, SessionManager 8 | import matplotlib.pyplot as plt 9 | 10 | def objs2recarry(objs): 11 | """Given a list/iterator of Pitch objects, turn it into a NumPy RecArray so 12 | one field can easily be grabbed 13 | 14 | e.g. objs2recarry(pitches)['start_speed'] gives an easy-to-plot array of pitch speeds""" 15 | 16 | keys = ['start_speed', 'pfx_z', 'pfx_x', 'px', 'pz', 17 | 'sz_bot', 'sz_top', 18 | 'break_length', 'break_angle', 'type', 'pitch_type'] 19 | types = [float, float, float, float, float, float, float, float, float, '|S1', '|S2'] #|S64 would be strings length <= 64, for example 20 | retrows = [[pitch.__dict__[key] for key in keys] for pitch in objs] 21 | retarray = numpy.rec.fromrecords(retrows, dtype=zip(keys, types)) 22 | return retarray 23 | 24 | def to_radian(degrees): 25 | """takes degrees in a system where 0 degrees is straight down, 26 | and positive degrees moves clockwise, returns measurement in radians 27 | on the unit circle""" 28 | 29 | 30 | other_degrees = 270 - degrees 31 | return (other_degrees / 180 * numpy.pi) 32 | 33 | def normalized_pitch_height(pitch): 34 | """Pitch locations are given in inches, but the height of the 35 | strike zone varies depending on the batter's height and stance. 36 | Therefore, to plot pitches by one pitcher against many batters, 37 | the heights must be normalized. 38 | 39 | Take a pitch object and return its height scaled to an average-height 40 | hitter, in feet. 41 | 1.6 ft represents the bottom of the zone and 3.65 represents the top. 42 | """ 43 | 44 | return ((pitch['pz'] - pitch['sz_bot']) / (pitch['sz_top'] - pitch['sz_bot']) 45 | * 2.05 + 1.6) 46 | 47 | class NoPitchesError(ValueError): 48 | def __init__(self, value, query, 49 | name, event, des, balls, strikes, type_, pitch_type, payoff, date, stand): 50 | self.value = value 51 | self.query = query 52 | self.q_dict = { 53 | 'name' : name, 54 | 'event' : event, 55 | 'des' : des, 56 | 'balls' : balls, 57 | 'strikes' : strikes, 58 | 'type_' : type_, 59 | 'pitch_type' : pitch_type, 60 | 'payoff' : payoff, 61 | 'date' : date, 62 | 'stand' : stand, 63 | } 64 | def __str__(self): 65 | ret_str = self.value 66 | for key, item in self.q_dict.items(): 67 | if item is not None: 68 | ret_str += "\n\t%s: %s" % (key, item) 69 | return ret_str 70 | 71 | @SessionManager.withsession 72 | def pitches(session, name=None, event=None, des=None, date=None, 73 | type_=None, pitch_type=None, stand=None, payoff=None, 74 | balls=None, strikes=None): 75 | """ 76 | Construct and execute an SQL query to get pitches of interest. Can filter by... 77 | name: as a tuple (first, last) 78 | last: Pitcher's last name 79 | first: first name 80 | event: "event" field means any pitch from any at-bat with that result (Strikout, Single, Walk, etc) 81 | des: longer description of the at-bat result as reported by MLB. (Mind your spelling and caps) 82 | date: a string, in the format of "April 6, 2011" 83 | type: S, B, or X for strike (called, fouled or swing-and-miss), ball, or in play 84 | pitch_type (unfortunately similar name): two-letter code to describe the pitch: 85 | FF: four-seam fastball 86 | FT: two-seam fastball 87 | CH: changeup 88 | CU: curveball 89 | and more... 90 | 91 | all fields except pitcher's name are optional. 92 | """ 93 | # This needs to be extended to use more fields. Filter by all the things! 94 | # The growing series of ifs is a code smell... 95 | 96 | q = session.query(Pitch) 97 | if name is not None: 98 | first, last = name 99 | q = q.join(Pitch.pitchedby).join(Pitch.atbat).filter(Player.last == last, Player.first==first) 100 | if event is not None: 101 | q = q.filter(AtBat.event == event) 102 | if des is not None: 103 | q = q.filter(Pitch.des == des) 104 | if balls is not None: 105 | q = q.filter(Pitch.balls == balls) 106 | if strikes is not None: 107 | q = q.filter(Pitch.strikes == strikes) 108 | if type_ is not None: 109 | q = q.filter(Pitch.type == type_) 110 | if pitch_type is not None: 111 | q = q.filter(Pitch.pitch_type == pitch_type) 112 | if payoff is not None: # If this is that last (deciding) pitch of an at-bat 113 | q = q.filter(Pitch.payoff == int(payoff)) 114 | if date is not None: 115 | q = q.join(Pitch.game).filter(Game.date == date) 116 | if stand is not None: 117 | q = q.join(Pitch.atbat).filter(AtBat.stand == stand) 118 | 119 | 120 | objs = q.all() 121 | if len(objs) == 0: 122 | raise NoPitchesError("No pitches were found to match this query.", q, 123 | name, event, des, balls, strikes, type_, pitch_type, payoff, date, stand) 124 | return objs2recarry(objs) 125 | 126 | def callcolor(call): 127 | if call == 'S': 128 | return '#EE0044' 129 | elif call == 'B': 130 | return '#0000EE' 131 | elif call == 'X': 132 | return '#00EE00' 133 | else: 134 | raise ValueError, call 135 | 136 | def callcolors(calls): 137 | return map(callcolor, calls) 138 | 139 | -------------------------------------------------------------------------------- /pypitches/baseball_test.yaml: -------------------------------------------------------------------------------- 1 | gamedirs_file: gamedirs.test.json 2 | #engine: sqlite 3 | #sqlite_file: baseballtest.sqlite 4 | engine: postgres 5 | postgres_db: pypitchestest 6 | postgres_user: pypitches 7 | postgres_password: slider 8 | tables_file: baseball.sql 9 | ignore_files: [inning_Scores.xml, inning_hit.xml] 10 | -------------------------------------------------------------------------------- /pypitches/beckett_break.yaml: -------------------------------------------------------------------------------- 1 | name: [Josh, Beckett] 2 | pitch_type: CU 3 | filename: 'speed_break_outcome.png' 4 | outcomes: 5 | sw_strike: 6 | color: '#B00000' 7 | marker: o 8 | foul: 9 | color: '#CC8798' 10 | marker: x 11 | groundout: 12 | color: '#006600' 13 | marker: x 14 | flyout: 15 | color: '#006600' 16 | marker: x 17 | lineout: 18 | color: '#006600' 19 | marker: p 20 | single: 21 | color: '#000000' 22 | marker: p 23 | double: 24 | color: '#000000' 25 | marker: ^ 26 | triple: 27 | color: '#000000' 28 | marker: ^ 29 | homerun: 30 | color: '#0000BB' 31 | marker: ^ 32 | flyout: 33 | color: '#006600' 34 | marker: ^ 35 | -------------------------------------------------------------------------------- /pypitches/buchholz.yaml: -------------------------------------------------------------------------------- 1 | name: [Clay, Buchholz] 2 | all_file: buchholz_all.png 3 | colors: 4 | 'X': '#0000EE' 5 | 'S': '#00EE00' 6 | 'B': '#EE0044' 7 | plots: 8 | - 9 | 'pitch_type': 'FF' 10 | 'filename': 'buchholz_fourseam.png' 11 | 'marker': 'o' 12 | 'color': '#CC0000' 13 | - 14 | 'pitch_type': 'FT' 15 | 'filename': 'buchholz_twoseam.png' 16 | 'marker': 'p' 17 | 'color': '#DD00DD' 18 | - 19 | 'pitch_type': 'CU' 20 | 'filename': 'buchholz_curve.png' 21 | 'marker': '^' 22 | 'color': '#0000CC' 23 | - 24 | 'pitch_type': 'CH' 25 | 'filename': 'buchholz_change.png' 26 | 'marker': 'x' 27 | 'color': '#CC00CC' 28 | - 29 | 'pitch_type': 'FC' 30 | 'filename': 'buchholz_cutter.png' 31 | 'marker': '*' 32 | 'color': '#000000' 33 | - 34 | 'pitch_type': 'SL' 35 | 'filename': 'buchholz_slider.png' 36 | 'marker': 'v' 37 | 'color': '#660066' 38 | 39 | 40 | -------------------------------------------------------------------------------- /pypitches/clustering.py: -------------------------------------------------------------------------------- 1 | # Splitters/split-fingered changeups? 2 | # 4-seamers that sink and 2-seamers that don't? 3 | # Slutters??? 4 | 5 | 6 | """SELECT pfx_x, pfx_z, 7 | normalized_velocity, 8 | normalized_break, 9 | """ 10 | # Normalized pitch speed: 11 | # range = 90th percentile velocity - 10th percvelo 12 | # min 13 | -------------------------------------------------------------------------------- /pypitches/download.py: -------------------------------------------------------------------------------- 1 | # download.py 2 | # grab selected files from MLB Advanced Media's server 3 | # and replicate the same directory structure locally 4 | 5 | import os 6 | from os.path import join, pardir, abspath, isdir 7 | import re 8 | from BeautifulSoup import BeautifulSoup 9 | from urllib2 import urlopen 10 | import select_gamedirs 11 | import model 12 | from model import SessionManager, GameDir 13 | 14 | server_string = "http://gdx.mlb.com" 15 | start_dir = "/components/game/mlb/" 16 | 17 | # Patterns to grab only specific years, months, days, or games 18 | year_pattern = "year_2012" #only want this one year 19 | month_pattern = "month_07" 20 | day_pattern = "day_" 21 | game_pattern = "gid_" 22 | default_patterns = [year_pattern, month_pattern, day_pattern, game_pattern] 23 | 24 | # These three 25 | xml_wishlist = ("game.xml", "players.xml", "boxscore.xml") 26 | 27 | current_path = abspath(".") 28 | 29 | def get_links(string, pattern): 30 | """string is an HTML page or tag soup. 31 | Scan all links in that page and yield those that resemble pattern.""" 32 | soup = BeautifulSoup(string) 33 | for link in soup.findAll("a"): 34 | if re.match(pattern, link.string.lstrip()): 35 | yield link.string.lstrip(), link['href'] 36 | 37 | def grab_page(url, filename=None): 38 | """Grab page at url and either return it as a string or save it to file""" 39 | try: 40 | response = urlopen(url) 41 | html = response.read() 42 | except Exception as err: 43 | print >>sys.stderr, "url: {0}\n\t{1}".format(url, str(err)) 44 | 45 | if filename is None: 46 | return html 47 | else: 48 | with open(filename, 'w') as handle: 49 | handle.write(html) 50 | 51 | @SessionManager.withsession 52 | def database_has(session, gamedir_url): 53 | """Is that url already downloaded AND is it good?""" 54 | 55 | records = session.query(GameDir).filter(GameDir.url == gamedir_url).all() 56 | if len(records) > 1: 57 | raise ValueError, "multiple records in database for url= {0}".format(gamedir_url) 58 | elif len(records) == 1: 59 | if records[0].status == 'error' or records[0].status == 'redo': 60 | path = records[0].path 61 | print "Deleting and replacing old gamedir {0}".format(path) 62 | shutil.rmtree(path) 63 | session.delete(records[0]) 64 | session.commit() 65 | return False 66 | else: 67 | return True 68 | else: 69 | return False 70 | 71 | @SessionManager.withsession 72 | def download_game(session, gamedir_url, check_local=lambda x:False): 73 | """Download the game in directory gamedir_url. 74 | First grab the directory and get a listing. 75 | Expect to find a few .xml files and an inning/ directory with an 76 | inning_all.xml file. 77 | 78 | Optionally, take a function to decide whether to proceed""" 79 | 80 | pbp_string = "inning/" 81 | if check_local(gamedir_url): 82 | print gamedir_url, "skipping because already have good data" 83 | return 84 | links_and_hrefs = dict(get_links(grab_page(gamedir_url), pbp_string)) 85 | if links_and_hrefs: 86 | os.mkdir(join(current_path, pbp_string)) 87 | dest_path_inning_all = join(current_path, pbp_string, "inning_all.xml") 88 | grab_page(gamedir_url + "inning/inning_all.xml", dest_path_inning_all) 89 | for xmlname in xml_wishlist: 90 | dest_path_etc = join(current_path, xmlname) 91 | grab_page(gamedir_url + xmlname, dest_path_etc) 92 | gamedir_row = GameDir(url=gamedir_url, path=current_path, status='not examined', local_copy=True) 93 | else: 94 | print gamedir_url, " no inning/ directory" 95 | gamedir_row = GameDir(url=gamedir_url, path=None, status='error', 96 | status_long="no {0} directory".format(pbp_string), local_copy=False) 97 | session.add(gamedir_row) 98 | session.commit() 99 | 100 | 101 | 102 | def navigate_dirs(start_url, patterns, fun=download_game): 103 | """Navigate the directory structure on the server to find 104 | game directories. 105 | 106 | When you hit the end of a pattern, call the function, 107 | which defaults to download_game""" 108 | global current_path 109 | if len(patterns) > 0: 110 | for linkname, href in get_links(grab_page(start_url), patterns[0]): 111 | newdir = href.split("/")[-2] 112 | if newdir not in os.listdir(current_path): 113 | os.mkdir(join(current_path, newdir)) 114 | current_path = join(current_path, newdir) 115 | navigate_dirs(start_url + href, patterns[1:], fun) 116 | current_path = abspath(join(current_path, pardir)) 117 | else: 118 | fun(start_url) 119 | 120 | def download_with_patterns(patterns=default_patterns, local_dir='downloads'): 121 | 122 | global current_path 123 | if not isdir(local_dir): 124 | os.mkdir(local_dir) 125 | current_path = join(current_path, local_dir) 126 | navigate_dirs(server_string + start_dir, patterns, download_game) 127 | current_path = abspath(join(current_path, pardir)) 128 | 129 | if __name__ == "__main__": 130 | from settings import postgres_db, postgres_user, postgres_password 131 | SessionManager.create(postgres_db, postgres_user, postgres_password) 132 | download_with_patterns(local_dir='download') 133 | -------------------------------------------------------------------------------- /pypitches/lester_ch.yaml: -------------------------------------------------------------------------------- 1 | name: [Jon, Lester] 2 | marker: o 3 | colors: 4 | 'X': '#0000EE' 5 | 'S': '#00EE00' 6 | 'B': '#EE0044' 7 | plots: 8 | - 9 | pitch_type: 'CH' 10 | filename: 'lester_ch_l.png' 11 | stand: 'L' 12 | - 13 | pitch_type: 'CH' 14 | filename: 'lester_ch_r.png' 15 | stand: 'R' 16 | -------------------------------------------------------------------------------- /pypitches/lester_cu.yaml: -------------------------------------------------------------------------------- 1 | name: [Jon, Lester] 2 | marker: o 3 | colors: 4 | 'X': '#0000EE' 5 | 'S': '#00EE00' 6 | 'B': '#EE0044' 7 | plots: 8 | - 9 | pitch_type: 'CU' 10 | filename: 'lester_cu_l.png' 11 | stand: 'L' 12 | - 13 | pitch_type: 'CU' 14 | filename: 'lester_cu_r.png' 15 | stand: 'R' 16 | -------------------------------------------------------------------------------- /pypitches/load.py: -------------------------------------------------------------------------------- 1 | # Load the XML data into a database 2 | # expects to find a gamedirs.yaml file, 3 | # which was created by select_gamedirs.py 4 | 5 | from BeautifulSoup import BeautifulStoneSoup 6 | import os.path 7 | import os 8 | import sys 9 | import pdb 10 | import yaml #use this for hand-written configs 11 | import json #use this for generated files 12 | from sqlalchemy.exc import IntegrityError 13 | from dateutil import parser 14 | import model 15 | from model import GameDir, Player, Game, Pitch, Team, AtBat, Runner 16 | 17 | 18 | verbose = True 19 | 20 | errorlog = open("err", "w") 21 | nonamelog = open("unsaved_attrs.err", "w") 22 | 23 | def xml2obj(attrs, baseballclass): 24 | """Takes a list of attributes (as returned by BeautifulStoneSoup) 25 | and a class from py 26 | 27 | Apply those attributes to a new instance of that class.""" 28 | obj = baseballclass() 29 | for name, val in dict(attrs).iteritems(): 30 | if val == '': 31 | val = None 32 | if name in dir(baseballclass): 33 | try: 34 | obj.__setattr__(name, val) 35 | except TypeError: 36 | pdb.set_trace() 37 | except AttributeError: 38 | pdb.set_trace() 39 | except UnicodeEncodeError: 40 | obj.__setattr__(name, None) 41 | #print >>errorlog, "Funny character in:\n" , str(attrs) 42 | else: 43 | try: 44 | print >>nonamelog, name, val, str(baseballclass) 45 | except ValueError: 46 | print >>nonamelog, name 47 | return obj 48 | 49 | def get_start_date(gamedirs): 50 | """Look at files in all these dirs and return the earliest date.""" 51 | def date_of(gamedir): 52 | datestring = BeautifulStoneSoup(open(os.path.join(gamedir, "boxscore.xml"))).findAll('boxscore')[0]['date'] 53 | return dateutils.parse(datestring) 54 | 55 | @model.SessionManager.withsession 56 | def loadbox(session, gamedirs): 57 | """Load game and box score data from game.xml, boxscore.xml 58 | Team names, etc. 59 | Takes a list of directories, not just one, in case of suspensions.""" 60 | 61 | #most info will be grabbed from the first 62 | gamefile = os.path.join(gamedirs[0], "game.xml") 63 | boxscorefile = os.path.join(gamedirs[0], "boxscore.xml") 64 | gamedata = BeautifulStoneSoup(open(gamefile)) 65 | boxscoredata = BeautifulStoneSoup(open(boxscorefile)) 66 | for teamdata in gamedata.findAll('team'): 67 | teamobj = xml2obj(teamdata.attrs, Team) 68 | if not session.query(Team).filter(Team.code == teamobj.code).all(): 69 | session.add(teamobj) 70 | session.flush() 71 | gameobj = xml2obj(boxscoredata.findAll('boxscore')[0].attrs + gamedata.findAll('game')[0].attrs, Game) 72 | gameobj.game_pk = int(gameobj.game_pk) 73 | gameobj.start_date = get_start_date(gamedirs) 74 | session.add(gameobj) 75 | session.flush() 76 | return gameobj 77 | 78 | @model.SessionManager.withsession 79 | def loadplayers(session, playersfile, gameobj): 80 | """Load players.xml""" 81 | ids = {} 82 | playersdata = BeautifulStoneSoup(open(playersfile)) 83 | for team in playersdata.findAll('team'): 84 | for playerdata in team.findAll('player'): 85 | if playerdata['id'] in ids: 86 | print >>errorlog, "ignoring duplicate playerdata: %s from file %s" % (str(playerdata.attrs), playersfile) 87 | continue 88 | ids[playerdata['id']] = playerdata 89 | if not session.query(Player).filter(Player.id == playerdata['id']).first(): 90 | playerobj = xml2obj(playerdata.attrs, Player) 91 | session.add(playerobj) 92 | else: 93 | pass 94 | 95 | class DuplicatePitchError(RuntimeError): 96 | pass 97 | class InningStructureError(RuntimeError): 98 | def __init__(self, offending_xml_file, inningdata, description): 99 | self.offending_xml_file = offending_xml_file 100 | self.inningdata = inningdata 101 | self.description = description 102 | class MissingFileError(RuntimeError): 103 | def __init__(self, directory, missing_file): 104 | self.directory = directory 105 | self.missing_file = missing_file 106 | self.value = "Looking for %s in %s" % (missing_file, directory) 107 | def __str__(self): 108 | return self.value 109 | 110 | def check_innings(innings, pitchesfilename): 111 | """Takes a bunch of inning xml datasets. 112 | Sanity check. Does each inning have a top? 113 | Does each inning except possibly the last have a bottom? 114 | Does each atbat have some pitches? 115 | Return a list of atbats for the whole game. 116 | """ 117 | for inningdata in innings: 118 | if len(inningdata.findAll('top')) != 1: 119 | raise InningStructureError(pitchesfilename, inningdata, "missing top of inning") 120 | for inningdata in innings[:-1]: 121 | if len(inningdata.findAll('bottom')) != 1: 122 | raise InningStructureError(pitchesfilename, inningdata, "missing bottom of inning") 123 | for inningdata in innings: 124 | for half in 'top', 'bottom': 125 | halfdata = inningdata.findAll(half) 126 | if halfdata: 127 | for atbatdata in halfdata[0].findAll('atbat'): 128 | if not atbatdata.findAll('pitch'): 129 | #FIXME: an atbat will be skipped if a runner is picked off to end the inning before the first pitch. 130 | # Is that desired behavior? 131 | # Other cases where this could happen? 132 | print >>errorlog, "skipping atbat %s because no pitches (dir %s)" % (atbatdata['num'], pitchesfilename) 133 | else: 134 | atbatdata.attrs.append(('inning', inningdata['num'])) # I like to refer to inning # in the same structure 135 | yield atbatdata 136 | 137 | 138 | def by_pitchcount(pitchdata1, pitchdata2): 139 | # Not all pitches have this field. Is id reliable? Or not sorting at all? 140 | return cmp(pitchdata1['tfs'], pitchdata2['tfs']) 141 | 142 | @model.SessionManager.withsession 143 | def loadpitches(session, pitchesfile, gameobj): 144 | """Load at-bats and individual pitches and runner events 145 | from inning_all.xml""" 146 | 147 | def makepitchobj(pitchdata, count): 148 | pitchobj = xml2obj(pitchdata.attrs, Pitch) 149 | pitchobj.atbatnum = int(atbatdata['num']) 150 | pitchobj.batter = int(atbatdata['batter']) 151 | pitchobj.pitcher = int(atbatdata['pitcher']) 152 | pitchobj.game_pk = gameobj.game_pk 153 | pitchobj.payoff = False 154 | call = pitchdata['type'] 155 | pitchobj.balls = count['balls'] 156 | pitchobj.strikes = count['strikes'] 157 | if call == 'B': count['balls'] += 1 158 | if call == 'S': count['strikes'] += 1 159 | return pitchobj 160 | 161 | filedata = BeautifulStoneSoup(open(pitchesfile)) 162 | innings = filedata.findAll('inning') 163 | for atbatdata in check_innings(innings, pitchesfile): 164 | if verbose: print "\t", atbatdata['des'] 165 | atbatobj = xml2obj(atbatdata.attrs, AtBat) 166 | atbatobj.game_pk = gameobj.game_pk 167 | atbatobj.date = parser.parse(atbatdata['start_tfs_zulu']).date().ctime() 168 | session.add(atbatobj) 169 | session.flush() 170 | pitch_datas = atbatdata.findAll('pitch') 171 | # pitch_datas.sort(by_pitchcount) # Make sure these are in order because... 172 | count = {'balls': 0, 'strikes': 0} 173 | for pitchdata in pitch_datas[:-1]: 174 | pitchobj = makepitchobj(pitchdata, count) # ...balls and strikes are counted as we go and... 175 | session.add(pitchobj) 176 | try: 177 | session.flush() 178 | except IntegrityError as e: 179 | print "failed on pitchobj.game_pk = {0} pitchobj.atbatnum = {1} but last atbat added was {2}, {3}".format(pitchobj.game_pk, pitchobj.atbatnum, atbatobj.game_pk, atbatobj.num) 180 | raise 181 | 182 | pitchdata = pitch_datas[-1] # ... and last one gets special treatment 183 | pitchobj = makepitchobj(pitchdata, count) 184 | pitchobj.payoff = True 185 | session.add(pitchobj) 186 | # for runnerdata in atbatdata.findAll('runner'): 187 | # runnerobj = xml2obj(runnerdata.attrs, Runner) 188 | # runnerobj.atbatnum = atbatobj.num 189 | # runnerobj.game_pk = gameobj.game_pk 190 | # session.add(runnerobj) 191 | 192 | 193 | 194 | @model.SessionManager.withsession 195 | def load_game_data(session, game_pk, gamedirs): 196 | """Check for files in gamedir and then load game metadata 197 | (By calling loadbox and loadplayers) 198 | Then load atbats and pitches. 199 | """ 200 | gameobj = loadbox(gamedirs) 201 | print gameobj.date, gameobj.away_team_code, gameobj.home_team_code 202 | for gamedir in gamedirs: 203 | loadplayers(os.path.join(gamedir, "players.xml"), gameobj) 204 | if verbose: print "loaded game metadata: ", gamedir 205 | load_atbats(gamedirs, gameobj) 206 | 207 | def load_atbats(gamedirs, gameobj): 208 | game_pk = gameobj.game_pk 209 | for gamedir in gamedirs: 210 | loadpitches(os.path.join(gamedir, "inning", "inning_all.xml"), gameobj) 211 | if verbose: print "loaded at-bats and pitches: ", gamedirs 212 | 213 | def get_keys_and_dirs(gamedirs_file): 214 | """Takes the name of a json file mapping game primary keys to lists of directories for those games. 215 | 216 | Yields pairs of (key, [list of dirs]) 217 | """ 218 | for key, dirs in json.load(open(gamedirs_file)).iteritems(): 219 | yield key, dirs 220 | 221 | @model.SessionManager.withsession 222 | def load(session, statuses=frozenset(['final', 'maybe_partial'])): 223 | finals = session.query(GameDir).filter(GameDir.status.in_(statuses)).filter(GameDir.loaded == False).all() 224 | for final in finals: 225 | print final.path 226 | load_game_data(final.game_pk, [final.path]) 227 | final.loaded = True 228 | 229 | -------------------------------------------------------------------------------- /pypitches/model/__init__.py: -------------------------------------------------------------------------------- 1 | from classes import * 2 | 3 | # default Session with default params. For others 4 | # (e.g. testing database), call it yerself 5 | from session import * 6 | 7 | 8 | -------------------------------------------------------------------------------- /pypitches/model/classes.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.declarative import declarative_base 2 | from sqlalchemy import Integer, String, Column, DateTime 3 | from sqlalchemy import Float, Boolean, Text, CHAR, Date, func 4 | from sqlalchemy import ForeignKey, and_ 5 | from sqlalchemy.orm import relationship, backref 6 | 7 | Base = declarative_base() 8 | 9 | class Pitch(Base): 10 | __tablename__ = "pitch" 11 | 12 | des = Column(String) 13 | type = Column(CHAR(1)) 14 | id = Column(Integer, primary_key=True) #INTEGER, 15 | x = Column(Float) #FLOAT, 16 | y = Column(Float) #FLOAT, 17 | sv_id = Column(String) #VARCHAR(128), 18 | start_speed = Column(Float) 19 | end_speed = Column(Float) 20 | sz_top = Column(Float) 21 | sz_bot = Column(Float) 22 | pfx_x = Column(Float) 23 | pfx_z = Column(Float) 24 | px = Column(Float) 25 | pz = Column(Float) 26 | x0 = Column(Float) 27 | y0 = Column(Float) 28 | z0 = Column(Float) 29 | vx0 = Column(Float) 30 | vy0 = Column(Float) 31 | vz0 = Column(Float) 32 | ax = Column(Float) 33 | ay = Column(Float) 34 | az = Column(Float) 35 | break_y = Column(Float) 36 | break_angle = Column(Float) 37 | break_length = Column(Float) 38 | pitch_type = Column(String) 39 | type_confidence = Column(Float) 40 | spin_dir = Column(Float) 41 | spin_rate = Column(Float) 42 | nasty = Column(Integer) 43 | on_1b = Column(Integer, ForeignKey('player.id')) 44 | on_2b = Column(Integer, ForeignKey('player.id')) 45 | on_3b = Column(Integer, ForeignKey('player.id')) 46 | payoff = Column(Boolean) 47 | balls = Column(Integer) 48 | strikes = Column(Integer) 49 | 50 | game_pk = Column(Integer, ForeignKey('game.game_pk'), primary_key=True ) 51 | pitcher = Column(Integer, ForeignKey('player.id')) 52 | batter = Column(Integer, ForeignKey('player.id')) 53 | atbatnum = Column(Integer) 54 | pitchedby = relationship("Player", primaryjoin="Pitch.pitcher==Player.id") 55 | seenby = relationship("Player", primaryjoin="Pitch.batter==Player.id") 56 | # atbat = relationship("AtBat", primaryjoin=and_("Pitch.game_pk == AtBat.game_pk", "Pitch.atbatnum==AtBat.num")) 57 | game = relationship("Game", primaryjoin="Pitch.game_pk == Game.game_pk") 58 | 59 | 60 | class Game(Base): 61 | __tablename__ = "game" 62 | game_pk = Column(Integer, primary_key=True) 63 | away_team_code = Column(CHAR(3), ForeignKey('team.code')) 64 | home_team_code = Column(CHAR(3), ForeignKey('team.code')) 65 | away_fname = Column(Text) 66 | home_fname = Column(Text) 67 | away_sname = Column(Text) 68 | home_sname = Column(Text) 69 | stadium = Column(Integer, ForeignKey('stadium.id')) 70 | date = Column(Date) 71 | 72 | 73 | class Team(Base): 74 | __tablename__ = "team" 75 | id = Column(Integer) 76 | code = Column(CHAR(3), primary_key=True) 77 | name = Column(Text) 78 | name_full = Column(Text) 79 | name_brief = Column(Text) 80 | class Stadium(Base): 81 | __tablename__ = "stadium" 82 | 83 | id = Column(Integer, primary_key=True) 84 | name = Column(Text) 85 | location = Column(Text) 86 | class Player(Base): 87 | __tablename__ = "player" 88 | id = Column(Integer, primary_key=True) 89 | first = Column(Text) 90 | last = Column(Text) 91 | boxname = Column(Text) 92 | rl = Column(CHAR(1)) 93 | class PlayerInGame(Base): 94 | __tablename__ = "playeringame" 95 | id = Column(Integer, ForeignKey('player.id'), primary_key=True) 96 | game_pk = Column(Integer, ForeignKey('game.game_pk'), primary_key=True) 97 | num = Column(Integer) 98 | position = Column(CHAR(2)) #starting position? 99 | bat_order = Column(Integer) 100 | game_position = Column(CHAR(2)) #wtf? 101 | avg = Column(Float) 102 | era = Column(Float) 103 | hr = Column(Integer) 104 | rbi = Column(Integer) 105 | wins = Column(Integer) 106 | wins = Column(Integer) 107 | wins = Column(Integer) 108 | losses = Column(Integer) 109 | 110 | class AtBat(Base): 111 | __tablename__ = "atbat" 112 | 113 | 114 | inning = Column(Integer) 115 | num = Column(Integer, primary_key=True) 116 | 117 | game_pk = Column(Integer, ForeignKey('game.game_pk'), primary_key=True) 118 | b = Column(Integer) 119 | s = Column(Integer) 120 | stand = Column(CHAR(1)) 121 | p_throws = Column(CHAR(1)) 122 | inning = Column(Integer) 123 | batter = Column(Integer, ForeignKey('player.id')) 124 | pitcher = Column(Integer, ForeignKey('player.id')) 125 | b_height = Column(Text) 126 | des = Column(Text) 127 | event = Column(Text) 128 | brief_event = Column(Text) 129 | date = Column(Date) 130 | 131 | game = relationship("Game", backref=backref("atbats", order_by=num)) 132 | pitchedby = relationship("Player", primaryjoin="AtBat.pitcher==Player.id") 133 | wasbatter = relationship("Player", primaryjoin="AtBat.batter==Player.id") 134 | 135 | 136 | class Runner(Base): 137 | __tablename__ = "runner" 138 | runner_pk = Column(Integer, primary_key=True) 139 | atbatnum = Column(Integer, ForeignKey('atbat.num')) 140 | game_pk = Column(Integer, ForeignKey('game.game_pk')) 141 | id = Column(Integer, ForeignKey('player.id')) 142 | start = Column(Text) 143 | end = Column(Text) 144 | score = Column(CHAR(1)) 145 | rbi = Column(CHAR(1)) 146 | earned = Column(CHAR(1)) 147 | event = Column(Text) 148 | class GameDir(Base): 149 | __tablename__ = "gamedir" 150 | 151 | id = Column(Integer, primary_key=True) 152 | local_copy = Column(Boolean) 153 | url = Column(Text) 154 | path = Column(Text) 155 | status = Column(Text) 156 | status_long = Column(Text) 157 | loaded = Column(Boolean, default=False) 158 | game_pk = Column(Integer) 159 | atbats = Column(Integer) 160 | innings = Column(Integer) 161 | downloaded_time = Column(DateTime) 162 | loaded_time = Column(DateTime) 163 | date_scheduled = Column(Date) 164 | classified_time = Column(DateTime, server_default=func.now()) 165 | 166 | def __init_(self, url=None, path=None, status='not examined', local_copy=True): 167 | self.url = url 168 | self.path =path 169 | self.status= status 170 | self.local_copy = local_copy 171 | 172 | Pitch.atbat = relationship('AtBat', 173 | primaryjoin=(Pitch.atbatnum == AtBat.num) & (Pitch.game_pk == AtBat.game_pk), 174 | foreign_keys=[Pitch.atbatnum, Pitch.game_pk], 175 | uselist=False, 176 | ) 177 | -------------------------------------------------------------------------------- /pypitches/model/session.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from sqlalchemy.orm import scoped_session, sessionmaker 3 | from settings import postgres_db, postgres_user, postgres_password 4 | from contextlib import contextmanager 5 | import sys 6 | 7 | class ExistingSession(object): 8 | pass 9 | 10 | class NewSession(object): 11 | pass 12 | 13 | class SessionManager(object): 14 | _engine = None 15 | _sessionmaker = None 16 | _managed_sessions = [] 17 | 18 | @classmethod 19 | def destroy_all(cls): 20 | cls._engine = None 21 | cls._sessionmaker = None 22 | for session in cls._managed_sessions: 23 | session.rollback() 24 | session.close() 25 | cls._managed_sessions = [] 26 | 27 | @classmethod 28 | def create(cls, db=None, user=None, password=None): 29 | if not cls._engine: 30 | cls._engine, cls._sessionmaker = cls.create_engine(db, user, password) 31 | 32 | if cls._managed_sessions: 33 | cls._managed_sessions[-1].begin_nested() 34 | cls._managed_sessions += cls._managed_sessions[-1:] # add same session to the list again 35 | # it expects another .commit() now 36 | else: 37 | new_session = scoped_session(cls._sessionmaker) 38 | cls._managed_sessions += [new_session] 39 | return cls._managed_sessions[-1] 40 | 41 | @classmethod 42 | def get(cls): 43 | if cls._managed_sessions: 44 | return cls._managed_sessions[-1] 45 | else: 46 | return cls.create() 47 | 48 | @classmethod 49 | def commit(cls): 50 | return cls.get().commit() 51 | 52 | @classmethod 53 | def create_engine(cls, db, user, password): 54 | if None in (db, user, password): 55 | raise ValueError, "SessionManager.create_engine got no database connection parameters. Call create(db, user, password) first." 56 | engine = create_engine("postgres://%s:%s@localhost/%s" 57 | % (user, password, db), 58 | echo=False) 59 | return engine, sessionmaker(engine) 60 | 61 | ## This many be unnecessary when SA gives us begin_nested() as a context manager 62 | # @classmethod 63 | # @contextmanager 64 | # def context(cls): 65 | # session = cls.create(db, user, password) 66 | # try: 67 | # yield session 68 | # session.commit() 69 | # session.close() 70 | # cls._managed_sessions.pop() 71 | # except: 72 | # session.rollback() 73 | # session.close() 74 | # cls._managed_sessions.pop() 75 | # raise 76 | 77 | @classmethod 78 | def withsession(cls, fn, opt=ExistingSession): 79 | """Decorator. 80 | Given a function that requires a session as its first arg, 81 | returns a function that takes a session as an optional kwarg. 82 | If ExistingSession is given, use the most recent session. 83 | If NewSession is given, create a new one and add it to the list. 84 | """ 85 | if opt == NewSession: 86 | def new_fn(*args, **kwargs): 87 | with cls.context() as new_session: 88 | return fn(new_session, *args, **kwargs) 89 | elif opt == ExistingSession: 90 | def new_fn(*args, **kwargs): 91 | session = cls.get() 92 | return fn(session, *args, **kwargs) 93 | new_fn.__realname__ = fn.__name__ 94 | return new_fn 95 | 96 | 97 | -------------------------------------------------------------------------------- /pypitches/notes: -------------------------------------------------------------------------------- 1 | pg_ctl -D /usr/local/var/postgres -l /usr/local/var/postgres/server.log start -------------------------------------------------------------------------------- /pypitches/plot_pitch_locations.py: -------------------------------------------------------------------------------- 1 | # 'Quick' script to plot a pitcher's movement for each pitch type 2 | # driven by a yaml settings file 3 | 4 | from baseball_query import pitches, normalized_pitch_height, callcolors 5 | import matplotlib.pyplot as plt 6 | from matplotlib.path import Path 7 | from matplotlib.patches import PathPatch 8 | from collections import defaultdict 9 | import numpy 10 | import sys 11 | import yaml 12 | 13 | def do_plots(filename): 14 | with open(filename) as handle: 15 | plot_settings = yaml.load(handle) 16 | name = plot_settings['name'] 17 | plots = plot_settings['plots'] 18 | # first pass: plot one image for each pitch type 19 | for plot in plots: 20 | plot = defaultdict(lambda:None, plot) #if it's not mentioned in the yaml file, forget it 21 | pitchset = pitches(name=name, pitch_type = plot['pitch_type'], stand=plot['stand'], 22 | payoff=plot['payoff'], des=plot['des'], event=plot['event']) 23 | 24 | fig = plt.figure() 25 | ax = fig.add_subplot(111) 26 | 27 | for call in ('X', 'S', 'B'): 28 | subset = pitchset[pitchset['type'] == call] 29 | norm_pz = normalized_pitch_height(subset) 30 | plt.plot(subset['px'], 31 | norm_pz, c=plot_settings['colors'][call], marker=plot_settings['marker'], linestyle='None') 32 | 33 | #draw strike zone 34 | codes = [Path.MOVETO] + [Path.LINETO]*3 + [Path.CLOSEPOLY] 35 | vertices = [(-1,1.6), (1,1.6), (1, 3.65), (-1, 3.65), (0,0)] 36 | vertices = numpy.array(vertices, float) 37 | path = Path(vertices, codes) 38 | pathpatch = PathPatch(path, facecolor='None', edgecolor='black') 39 | ax.add_patch(pathpatch) 40 | 41 | plt.xlim([-3,3]) 42 | plt.ylim([0,6]) 43 | plt.savefig(plot['filename'], format='png') 44 | 45 | -------------------------------------------------------------------------------- /pypitches/plot_pitch_types.py: -------------------------------------------------------------------------------- 1 | # 'Quick' script to plot a pitcher's movement for each pitch type 2 | # driven by a yaml settings file 3 | 4 | import baseball 5 | from baseball_query import pitches, to_radian 6 | import matplotlib.pyplot as plt 7 | import matplotlib.projections 8 | from matplotlib import lines 9 | import sys 10 | import yaml 11 | 12 | 13 | if len(sys.argv) != 2: 14 | print "python plot_pitch_types.py wakefield.yaml" 15 | else: 16 | with open(sys.argv[1]) as handle: 17 | plot_settings = yaml.load(handle) 18 | 19 | db_settings, session = baseball.init() 20 | 21 | name = plot_settings['name'] 22 | plots = plot_settings['plots'] 23 | 24 | # first pass: plot one image for each pitch type 25 | for plot in plots: 26 | pitchset = pitches(session, name=name, pitch_type = plot['pitch_type']) 27 | 28 | fig = plt.figure() 29 | ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) 30 | ax.plot(map(to_radian, pitchset['break_angle']), 31 | pitchset['break_length'], c=plot['color'], marker=plot['marker'], linestyle='None') 32 | ax.set_rmax(18) 33 | plt.savefig(plot['filename'], format='png') 34 | 35 | # second pass: plot one image including all types, color-coded 36 | fig = plt.figure() 37 | ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) 38 | liness= [] 39 | namess= [] 40 | for plot in plots: 41 | pitchset = pitches(session, name=name, pitch_type = plot['pitch_type']) 42 | ax.plot(map(to_radian, pitchset['break_angle']), 43 | pitchset['break_length'], c=plot['color'], marker=plot['marker'], linestyle='None') 44 | liness.append(lines.Line2D([], [], color=plot['color'], marker=plot['marker'],lw=0)) 45 | namess.append(plot['filename'].split('.')[0]) 46 | 47 | plt.figlegend(liness,namess,'upper left') 48 | ax.set_rmax(18) 49 | plt.savefig(plot_settings['all_file'], dpi=200, format='png') 50 | -------------------------------------------------------------------------------- /pypitches/pypitches.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from IPython import embed 3 | import setup_postgres 4 | import settings 5 | from settings import postgres_password, postgres_user, postgres_db 6 | from os import path 7 | 8 | cmds = [ 9 | 'web', 10 | 'ipython', 11 | 'file', 12 | 'webtest', 13 | 'initdb', 14 | 'load', 15 | 'classify', 16 | ] 17 | 18 | def main(): 19 | try: 20 | cmd = sys.argv[1] 21 | assert(cmd in cmds) 22 | except: 23 | invocations = ["python pypitches.py {0}".format(cmd) for cmd in cmds] 24 | print "usage: " + "\n ".join(invocations) 25 | sys.exit() 26 | if cmd == 'initdb': 27 | setup_postgres.initdb(postgres_db, postgres_user, postgres_password) 28 | sys.exit() 29 | else: 30 | import model 31 | model.SessionManager.create(postgres_db, postgres_user, postgres_password) 32 | import load 33 | from web.app import app 34 | import select_gamedirs 35 | from plot_pitch_locations import do_plots 36 | 37 | if cmd == 'web': 38 | app.run() 39 | elif cmd == 'webtest': 40 | app.run('pypitchestest', 'pypitches', 'slider') 41 | elif cmd == 'ipython': 42 | embed() 43 | elif cmd == 'file': 44 | # will generate output by a config file 45 | # a la plot_pitch_locations.py 46 | assert len(sys.argv) > 2, "usage: python pypitches.py file file.yaml" 47 | do_plots(sys.argv[2]) 48 | 49 | elif cmd == 'download': 50 | # hit the MLBAM server and get it all 51 | pass 52 | elif cmd == 'classify': 53 | with model.SessionManager.get().begin_nested(): 54 | static_dir = sys.argv[2] 55 | select_gamedirs.classify_local_dirs_by_filesystem(static_dir) 56 | model.SessionManager.commit() 57 | elif cmd == 'load': 58 | statuses=set(sys.argv[2:]) or set(['final']) 59 | with model.SessionManager.get().begin_nested(): 60 | load.load(statuses) 61 | model.SessionManager.commit() 62 | 63 | 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /pypitches/scatterplot.py: -------------------------------------------------------------------------------- 1 | import baseball 2 | from baseball_query import pitches, normalized_pitch_height 3 | import matplotlib.pyplot as plt 4 | import numpy 5 | import sys 6 | import yaml 7 | 8 | 9 | import pdb 10 | 11 | 12 | if len(sys.argv) != 2: 13 | print "python plot_pitch_locations.py lester_break.yaml" 14 | else: 15 | with open(sys.argv[1]) as handle: 16 | plot_settings = yaml.load(handle) 17 | 18 | db_settings, session = baseball.init() 19 | 20 | name = plot_settings['name'] 21 | 22 | results = {} 23 | results['sw_strike'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 24 | des="Swinging Strike") 25 | results['foul'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 26 | des="Foul") 27 | results['groundout']= pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 28 | event="Groundout", payoff=True) 29 | results['flyout']= pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 30 | event="Flyout", payoff=True) 31 | results['lineout']= pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 32 | event="Lineout", payoff=True) 33 | results['single'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 34 | event="Single", payoff=True) 35 | results['double'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 36 | event="Double", payoff=True) 37 | #results['triple'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 38 | # event="Triple", payoff=True) 39 | results['homerun'] = pitches(session, name=name, pitch_type = plot_settings['pitch_type'], 40 | event="Home Run", payoff=True) 41 | legend_lines = [] 42 | keys = [] 43 | for key, pitchset in results.iteritems(): 44 | #plt.scatter(norm_pz, indices) 45 | legend_lines.append( plt.scatter(pitchset['break_length'], pitchset['start_speed'], marker=plot_settings['outcomes'][key]['marker'], color=plot_settings['outcomes'][key]['color'])) 46 | keys.append( key) 47 | 48 | plt.legend(legend_lines, keys, 'lower left') 49 | plt.xlabel('break length') 50 | plt.ylabel('pitch starting speed') 51 | 52 | plt.savefig(plot_settings['filename'], format='png') 53 | 54 | -------------------------------------------------------------------------------- /pypitches/select_gamedirs.py: -------------------------------------------------------------------------------- 1 | # The downloaded data tends to be problematic for several reasons 2 | # Some directories are empty 3 | # Some directories have a game.xml but nothing else, due to rain postponement 4 | # Some games have an inning_all.xml but were still postponed 5 | # Some games could be broken across 2 dates because they were suspended 6 | 7 | # This script examines all directories. 8 | # It writes that list to the db 9 | 10 | 11 | import os 12 | from os.path import abspath 13 | import sys 14 | from BeautifulSoup import BeautifulStoneSoup 15 | from collections import defaultdict 16 | from model import GameDir, SessionManager 17 | import datetime 18 | 19 | @SessionManager.withsession 20 | def classify_dir(session, callback, gamedir, files): 21 | """Determine if a game was postponed by looking in its boxscore.xml and, if necessary, in its inning_all.xml 22 | 23 | Intended for use through os.path.walk, so first arg is a callback function. 24 | 25 | Handling of suspended games is complicated. 26 | The game may be restarted from the first inning even if an inning or two was played, 27 | but I don't want to throw out that data. 28 | """ 29 | 30 | print "classify_dir: ", gamedir 31 | if 'boxscore.xml' not in files: 32 | return #don't care about other dirs 33 | status_ind = BeautifulStoneSoup(open(os.path.join(gamedir, 'boxscore.xml'))).findAll('boxscore')[0]['status_ind'] 34 | game_pk = BeautifulStoneSoup(open(os.path.join(gamedir, 'game.xml'))).findAll('game')[0]['game_pk'] 35 | innings = len(BeautifulStoneSoup(open(os.path.join(gamedir, 'inning', 'inning_all.xml'))).findAll('inning')) 36 | date_str = BeautifulStoneSoup(open(os.path.join(gamedir, 'boxscore.xml'))).findAll('boxscore')[0]['date'] 37 | date = datetime.datetime.strptime(date_str, "%B %d, %Y").date() 38 | 39 | if status_ind == 'F': 40 | callback(gamedir, status='final', pk=game_pk, innings=innings, date=date) 41 | elif status_ind == 'P' or status_ind == 'PR': 42 | callback(gamedir, status='postponed', pk=game_pk, innings=innings, date=date) 43 | else: 44 | # Can't stop here. Check that at least one at-bat was actually played 45 | atbats = len(BeautifulStoneSoup(open(os.path.join(gamedir, 'inning/inning_all.xml'))).findAll('atbat')) 46 | if atbats == 0: 47 | #raise MissingAtbatsError(gamedir, "status_ind=%s but no plate appearances took place" % (status_ind,)) 48 | callback(gamedir, status='error', status_long="status_ind=%s but no plate appearances took place" % (status_ind,), 49 | pk=game_pk, innings=innings, atbats=atbats, date=date) 50 | else: 51 | callback(gamedir, status='maybe_partial', status_long='status_ind={0}'.format(status_ind), 52 | pk=game_pk, innings=innings, atbats=atbats, date=date) 53 | 54 | class GameDirError(RuntimeError): 55 | def __init__(self, gamedirs, descr): 56 | self.gamedirs = gamedirs 57 | self.value = descr 58 | def __str__(self): 59 | return self.value + str(self.gamedirs) 60 | 61 | class DuplicateGamesError(GameDirError): 62 | pass 63 | class MissingAtbatsError(GameDirError): 64 | pass 65 | 66 | @SessionManager.withsession 67 | def update_or_add_gamedir(session, path, status, innings=None, pk=None, status_long=None, atbats=None, date=None): 68 | maybe_gamedir = session.query(GameDir).filter(GameDir.path==path).all() 69 | if len(maybe_gamedir) == 1: 70 | gamedir = maybe_gamedir[0] 71 | elif len(maybe_gamedir) == 0: 72 | gamedir = GameDir() 73 | session.add(gamedir) 74 | else: 75 | raise ValueError, "Duplicate gamedir.path in database: {0}".format(path) 76 | gamedir.path = path 77 | gamedir.status = status 78 | gamedir.status_long = status_long 79 | gamedir.local_copy = True 80 | gamedir.game_pk = pk 81 | gamedir.innings = innings 82 | gamedir.atbats = atbats 83 | gamedir.date_scheduled = date 84 | 85 | 86 | @SessionManager.withsession 87 | def classify_local_dirs_by_filesystem(session, rootdir): 88 | os.path.walk(abspath(rootdir), classify_dir, update_or_add_gamedir) 89 | session.flush() 90 | 91 | @SessionManager.withsession 92 | def classify_local_dirs_by_database(session): 93 | for path, in session.query(GameDir.path).filter(GameDir.local_copy == True).filter(GameDir.path != None): 94 | classify_dir(update_or_add_gamedir, path, os.listdir(path)) 95 | session.flush() 96 | 97 | 98 | if __name__ == "__main__": 99 | db, user, password, start_dir = sys.argv[1:5] 100 | #classify_local_dirs_by_filesystem(start_dir) 101 | classify_local_dirs_by_database() 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /pypitches/settings.py: -------------------------------------------------------------------------------- 1 | 2 | engine = "postgres" 3 | postgres_db = "pypitches" 4 | postgres_test_db = "pypitches_test" 5 | postgres_user = "pypitches" 6 | postgres_password = "slider" 7 | ignore_files = ["inning_scores.xml", "inning_hit.xml"] 8 | -------------------------------------------------------------------------------- /pypitches/setup_postgres.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import preprocess 3 | import psycopg2 4 | from os.path import split, join, dirname, abspath 5 | #from pypitches import pypitches 6 | 7 | 8 | 9 | # call(['dropdb', '-U', 'pypitches', db_name]) 10 | # call(['createdb', '-U', 'pypitches', db_name]) 11 | 12 | pypitches_root = split(dirname(abspath(__file__)))[0] 13 | sql_dir = join(pypitches_root, "sql") 14 | sql_file = join(sql_dir, "baseball.sql") 15 | 16 | _conn = None 17 | def get_cursor(db, user, password): 18 | if _conn and not _conn.closed: 19 | return _conn, _conn.cursor() 20 | else: 21 | try: 22 | print "new psycopg2 connection" 23 | conn = psycopg2.connect("dbname='%(postgres_db)s' user='%(postgres_user)s' host='localhost' password='%(postgres_password)s'" 24 | % dict(postgres_db=db, postgres_user=user, postgres_password=password)) 25 | except psycopg2.OperationalError as err: 26 | if 'password authentication failed' in err.args[0]: 27 | raise EnvironmentError, err.args[0] + "\n\n is the postgres user %s created?" % (user,) 28 | if 'does not exist' in err.args[0]: 29 | raise EnvironmentError, err.args[0] + "\n\n has the database been created?" 30 | raise 31 | 32 | cursor = conn.cursor() 33 | return conn, cursor 34 | 35 | def initdb(db, user, password, new_conn=True): 36 | if new_conn: 37 | global _conn 38 | _conn = None 39 | conn, cursor = get_cursor(db, user, password) 40 | 41 | with open(sql_file) as inhandle: 42 | ddl_string = "".join(list(inhandle)) 43 | cursor.execute(ddl_string) 44 | conn.commit() 45 | 46 | def destroydb(db, user, password): 47 | conn, cursor = get_cursor(db, user, password) 48 | conn.set_isolation_level(0) 49 | cursor.execute("DROP DATABASE %s" % (db,)) 50 | conn.commit() 51 | conn.close() 52 | 53 | 54 | 55 | if __name__ == "__main__": 56 | settings = dict(postgres_db='pypitches', postgres_user='pypitches', postgres_password='slider') 57 | initdb(settings) -------------------------------------------------------------------------------- /pypitches/shrink.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open("gamedirs.2011.json") as handle: 4 | bigd = json.load(handle) 5 | 6 | littled = dict([(key, val[0]) for key, val in bigd.iteritems() 7 | if 'bos' in val[0]]) 8 | 9 | with open("gamedirs.small.json", "w") as handle2: 10 | json.dump(littled, handle2) 11 | -------------------------------------------------------------------------------- /pypitches/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gregoryck/pypitches/4c3e5d6987a8e7b23fc25022a00af0c15318ccd0/pypitches/test/__init__.py -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2011/month_08/day_01/gid_2011_08_01_clemlb_bosmlb_1/boxscore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | BATTING
2B: Saltalamacchia (15, Tomlin), Crawford (14, Tomlin), Ellsbury (30, Perez, C).
3B: Youkilis (2, Tomlin).
HR: Crawford (7, 3rd inning off Tomlin, 0 on, 2 out), Saltalamacchia (10, 6th inning off Tomlin, 1 on, 1 out).
TB: Ellsbury 2; Pedroia 2; Gonzalez, Ad; Youkilis 4; Crawford 6; Saltalamacchia 6; Reddick; Scutaro; Aviles.
RBI: Scutaro (21), Youkilis (75), Crawford (37), Saltalamacchia 2 (36), Ellsbury (63).
2-out RBI: Crawford; Ellsbury.
Runners left in scoring position, 2 out: Ortiz; Pedroia 2.
GIDP: Gonzalez, Ad.
Team RISP: 4-for-8.
Team LOB: 5.

FIELDING
E: Reddick (2, throw).

]]>
BATTING
2B: Brantley (20, Lackey), Kipnis (2, Lackey), LaPorta (15, Williams).
HR: Cabrera, A 2 (19, 6th inning off Lackey, 1 on, 1 out; 8th inning off Bard, 1 on, 0 out), Hafner (10, 6th inning off Lackey, 0 on, 1 out), Kipnis (2, 9th inning off Albers, 0 on, 1 out).
TB: Brantley 2; Kipnis 7; Cabrera, A 9; Hafner 4; Santana, C; Fukudome; LaPorta 2; Carrera 2.
RBI: Brantley (41), Fukudome (15), Cabrera, A 4 (65), Hafner (43), LaPorta (39), Kipnis (3).
2-out RBI: Brantley.
Runners left in scoring position, 2 out: Kipnis; Carrera 2.
Team RISP: 3-for-7.
Team LOB: 4.

BASERUNNING
CS: Fukudome (3, 2nd base by Lackey/Saltalamacchia).

FIELDING
DP: (LaPorta-Cabrera, A-LaPorta).

]]>
31 | Pitches-strikes: Tomlin 94-66, Perez, R 12-8, Pestano 10-7, Perez, C 23-17, Lackey 106-74, Morales, F 3-3, Bard 20-14, Williams 11-9, Albers 16-10.
Groundouts-flyouts: Tomlin 6-6, Perez, R 2-0, Pestano 1-0, Perez, C 1-0, Lackey 7-4, Morales, F 0-0, Bard 1-0, Williams 2-0, Albers 1-1.
Batters faced: Tomlin 27, Perez, R 3, Pestano 3, Perez, C 5, Lackey 27, Morales, F 1, Bard 4, Williams 4, Albers 4.
Inherited runners-scored: Morales, F 1-0, Williams 1-1.
Umpires: HP: Mike Estabrook. 1B: Todd Tichenor. 2B: Gerry Davis. 3B: Angel Hernandez.
Weather: 83 degrees, partly cloudy.
Wind: 13 mph, Out to CF.
T: 3:11.
Att: 37,943.
]]>
32 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2011/month_08/day_01/gid_2011_08_01_clemlb_bosmlb_1/game.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2011/month_08/day_01/gid_2011_08_01_clemlb_bosmlb_1/inning/inning_all.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2011/month_08/day_01/gid_2011_08_01_clemlb_bosmlb_1/players.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gregoryck/pypitches/4c3e5d6987a8e7b23fc25022a00af0c15318ccd0/pypitches/test/static/testdummy/year_2012/month_07/day_01/.DS_Store -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_arimlb_milmlb_1/boxscore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | a-Struck out for Rodriguez, Fr in the 8th.
1-Ran for Ramirez, Ar in the 9th. ]]> 35 |
36 | BATTING
2B: Aoki (13, Collmenter), Weeks (12, Collmenter).
TB: Ramirez, Ar; Weeks 2; Aoki 3.
RBI: Morgan (5).
Runners left in scoring position, 2 out: Maldonado 2.
Team RISP: 0-for-4.
Team LOB: 5.

BASERUNNING
SB: Gomez (9, 2nd base off Corbin/Montero).

FIELDING
E: Weeks 2 (9, fielding, throw).
DP: (Ransom-Weeks-Hart, C).

]]> 37 |
38 |
39 | 40 | 41 | 42 | (BS, 3) 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | a-Walked for Collmenter in the 7th. b-Struck out for Hernandez, Da in the 9th. ]]> 61 | 62 | BATTING
HR: Kubel (12, 8th inning off Rodriguez, Fr, 0 on, 0 out).
TB: Parra, G; Bloomquist; Hill, A; Kubel 5.
RBI: Kubel (51).
Runners left in scoring position, 2 out: Parra, G; Bloomquist; Hill, A; Upton, J 2.
SAC: Collmenter.
GIDP: Hill, A.
Team RISP: 0-for-4.
Team LOB: 9.

BASERUNNING
CS: Bloomquist (8, 2nd base by Axford/Maldonado).

FIELDING
E: Montero (5, throw), Parra, G (2, throw).

]]> 63 |
64 |
65 | WP: Collmenter, Gallardo.
Pitches-strikes: Collmenter 93-66, Shaw 18-9, Hernandez, Da 17-10, Corbin 12-4, Gallardo 117-66, Veras 6-4, Rodriguez, Fr 18-11, Axford 17-11.
Groundouts-flyouts: Collmenter 4-7, Shaw 1-2, Hernandez, Da 0-1, Corbin 0-1, Gallardo 9-6, Veras 0-0, Rodriguez, Fr 2-0, Axford 0-0.
Batters faced: Collmenter 21, Shaw 5, Hernandez, Da 4, Corbin 2, Gallardo 29, Veras 1, Rodriguez, Fr 4, Axford 3.
Inherited runners-scored: Veras 3-0.
Umpires: HP: Ed Hickox. 1B: Mark Carlson. 2B: Angel Hernandez. 3B: Chris Conroy.
Weather: 82 degrees, cloudy.
Wind: 5 mph, In from LF.
T: 3:14.
Att: 38,605.
]]> 66 |
67 |
-------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_arimlb_milmlb_1/game.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_arimlb_milmlb_1/players.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_bosmlb_seamlb_1/boxscore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 1-Ran for Smoak in the 9th. ]]> 37 | 38 | BATTING
2B: Smoak (4, Padilla).
TB: Figgins; Ryan 2; Smoak 2.
RBI: Suzuki, I (27).
Runners left in scoring position, 2 out: Montero; Figgins; Ryan.
SF: Suzuki, I.
GIDP: Montero.
Team RISP: 1-for-7.
Team LOB: 8.

BASERUNNING
SB: Figgins (4, 2nd base off Doubront/Shoppach), Ryan 2 (7, 2nd base off Doubront/Shoppach, 3rd base off Doubront/Shoppach).
CS: Ackley (2, 2nd base by Albers/Shoppach).

]]> 39 |
40 |
41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | a-Doubled for Lillibridge in the 10th. ]]> 65 | 66 | BATTING
2B: Kalish (2, League).
HR: Pedroia (6, 8th inning off Vargas, 0 on, 1 out).
TB: Kalish 2; Ross, C 2; Gonzalez, Ad; Pedroia 5; Punto.
RBI: Pedroia (32), Ortiz (54).
Runners left in scoring position, 2 out: Punto 2; Shoppach; Gonzalez, Ad.
SF: Ortiz.
Team RISP: 1-for-6.
Team LOB: 9.

BASERUNNING
SB: Middlebrooks (3, 2nd base off Wilhelmsen/Montero), Pedroia (5, 2nd base off Luetge/Montero).

FIELDING
E: Shoppach (2, throw).
DP: (Middlebrooks-Gonzalez, Ad).

]]> 67 |
68 |
69 | IBB: Ross, C (by Luetge).
HBP: Seager (by Doubront).
Pitches-strikes: Doubront 103-61, Albers 15-7, Atchison 21-15, Padilla 16-6, Aceves 5-3, Vargas 117-74, Wilhelmsen 17-9, League 8-6, Luetge 10-3.
Groundouts-flyouts: Doubront 2-6, Albers 3-0, Atchison 5-0, Padilla 0-2, Aceves 0-2, Vargas 5-5, Wilhelmsen 1-1, League 1-0, Luetge 0-2.
Batters faced: Doubront 21, Albers 4, Atchison 6, Padilla 5, Aceves 3, Vargas 31, Wilhelmsen 4, League 3, Luetge 3.
Inherited runners-scored: Albers 3-0, Luetge 2-1.
Umpires: HP: Fieldin Culbreth. 1B: Adrian Johnson. 2B: Gary Cederstrom. 3B: Lance Barksdale.
Weather: 62 degrees, cloudy.
Wind: 6 mph, In from CF.
T: 3:21.
Att: 34,065.
]]> 70 |
71 |
72 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_bosmlb_seamlb_1/game.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_bosmlb_seamlb_1/players.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_chamlb_nyamlb_1/boxscore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | BATTING
HR: Chavez, Er (6, 2nd inning off Floyd, 1 on, 0 out), Cano (20, 3rd inning off Floyd, 1 on, 1 out).
TB: Cano 5; Chavez, Er 5; Ibanez; Jeter; Swisher; Granderson.
RBI: Chavez, Er 2 (15), Cano 2 (46).
Runners left in scoring position, 2 out: Swisher 2; Martin 2; Teixeira.
GIDP: Cano.
Team RISP: 1-for-7.
Team LOB: 9.

BASERUNNING
SB: Ibanez (3, 2nd base off Floyd/Pierzynski).

]]> 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | BATTING
2B: De Aza (15, Hughes, P), Rios (15, Hughes, P).
TB: Konerko; Pierzynski; Rios 3; De Aza 2; Youkilis.
RBI: Youkilis (18), Rios (42).
2-out RBI: Rios.
Runners left in scoring position, 2 out: De Aza; Pierzynski 2.
Team RISP: 2-for-7.
Team LOB: 6.

BASERUNNING
SB: Ramirez, Al (10, 2nd base off Hughes, P/Martin).

FIELDING
E: Youkilis (4, fielding).
DP: (Floyd-Pierzynski-Konerko).

]]> 51 |
52 |
53 | WP: Floyd.
Pitches-strikes: Floyd 122-70, Septimo 19-15, Jones, N 5-4, Hughes, P 106-74, Soriano, R 18-11.
Groundouts-flyouts: Floyd 7-5, Septimo 3-2, Jones, N 0-0, Hughes, P 5-3, Soriano, R 0-1.
Batters faced: Floyd 28, Septimo 8, Jones, N 1, Hughes, P 31, Soriano, R 4.
Inherited runners-scored: Septimo 1-0, Jones, N 1-0.
Umpires: HP: Eric Cooper. 1B: Marty Foster. 2B: Tim Timmons. 3B: Jeff Kellogg.
Weather: 95 degrees, sunny.
Wind: 15 mph, R to L.
T: 2:48.
Att: 48,324.
]]> 54 |
55 |
-------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_chamlb_nyamlb_1/game.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /pypitches/test/static/testdummy/year_2012/month_07/day_01/gid_2012_07_01_chamlb_nyamlb_1/players.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /pypitches/test/test_basics.py: -------------------------------------------------------------------------------- 1 | import nose 2 | import sys 3 | from os.path import split, join, abspath, curdir, dirname 4 | from subprocess import call 5 | from unittest import TestCase 6 | from sqlalchemy.exc import IntegrityError 7 | 8 | parent_dir = split(dirname(__file__))[0] 9 | pypitches_root = split(split(dirname(abspath(__file__)))[0])[0] 10 | sys.path = [parent_dir] + sys.path 11 | 12 | # this can get imported under different names, 13 | # which requires a hack 14 | import model 15 | print model.__name__ 16 | sys.modules['pypitches.model.session'] = sys.modules[model.__name__] 17 | sys.modules['model.session'] = sys.modules[model.__name__] 18 | 19 | from model import GameDir, Team, Game, Pitch 20 | from settings import postgres_password, postgres_user, postgres_test_db 21 | from setup_postgres import initdb, get_cursor 22 | 23 | static_dir = join(dirname(abspath(__file__)), 'static', 'testdummy') 24 | 25 | 26 | class TestBasics(TestCase): 27 | def setUp(self): 28 | # Destroy/Create test database 29 | initdb(postgres_test_db, postgres_user, postgres_password) 30 | self.session = model.SessionManager.create(postgres_test_db, postgres_user, postgres_password) 31 | 32 | def tearDown(self): 33 | self.session.rollback() 34 | self.session.close() 35 | model.SessionManager.destroy_all() 36 | 37 | def test_basics(self): 38 | team = Team() 39 | team.id = 1 40 | team.code = 'MAR' 41 | team.name = "Martian War Machines" 42 | team.name_full = "Martian War Machines" 43 | team.name_brief = "Mars" 44 | self.session.add(team) 45 | 46 | team2 = Team() 47 | team2.id = 2 48 | team2.code = 'VEN' 49 | team2.name = "Venusian Pressure Cookers" 50 | team2.name_full = "Venusian Pressure Cookers" 51 | team2.name_brief = "Venus" 52 | self.session.add(team2) 53 | self.session.flush() 54 | 55 | def test_classify(self): 56 | from pypitches import select_gamedirs 57 | select_gamedirs.classify_local_dirs_by_filesystem(static_dir) 58 | self.assertEqual(self.session.query(GameDir).count(), 4) 59 | self.assertEqual(self.session.query(GameDir).filter(GameDir.status=='postponed').count(), 1) 60 | 61 | def test_load(self): 62 | from pypitches import select_gamedirs 63 | from pypitches import load 64 | select_gamedirs.classify_local_dirs_by_filesystem(static_dir) 65 | load.load() 66 | self.assertEqual(self.session.query(Game).count(), 3) 67 | self.session.commit() 68 | 69 | class TestWeb(TestCase): 70 | def test_import(self): 71 | import web 72 | from web import app 73 | 74 | -------------------------------------------------------------------------------- /pypitches/test/test_plots.py: -------------------------------------------------------------------------------- 1 | import nose 2 | import sys 3 | from os.path import split, join, abspath, curdir, dirname 4 | from subprocess import call 5 | from unittest import TestCase 6 | from sqlalchemy.exc import IntegrityError 7 | 8 | parent_dir = split(dirname(__file__))[0] 9 | pypitches_root = split(split(dirname(abspath(__file__)))[0])[0] 10 | sys.path = [parent_dir] + sys.path 11 | 12 | # this can get imported under different names, 13 | # which requires a hack 14 | import model 15 | print model.__name__ 16 | sys.modules['pypitches.model.session'] = sys.modules[model.__name__] 17 | sys.modules['model.session'] = sys.modules[model.__name__] 18 | 19 | from model import GameDir, Team, Game, Pitch 20 | # sys.modules[''] 21 | from settings import postgres_password, postgres_user, postgres_test_db 22 | from setup_postgres import initdb, get_cursor 23 | 24 | static_dir = join(dirname(abspath(__file__)), 'static', 'testdummy') 25 | 26 | class TestPlots(TestCase): 27 | def setUp(self): 28 | initdb(postgres_test_db, postgres_user, postgres_password) 29 | self.session = model.SessionManager.create(postgres_test_db, postgres_user, postgres_password) 30 | 31 | from pypitches import select_gamedirs 32 | from pypitches import load 33 | select_gamedirs.classify_local_dirs_by_filesystem(static_dir) 34 | try: 35 | load.load() 36 | except IntegrityError: 37 | import pdb 38 | pdb.set_trace() 39 | self.assertEqual(self.session.query(Game).count(), 3) 40 | 41 | def tearDown(self): 42 | self.session.rollback() 43 | self.session.close() 44 | model.SessionManager.destroy_all() 45 | 46 | def test_atbat(self): 47 | pitch = self.session.query(Pitch).filter().all()[0] 48 | atbat = pitch.atbat 49 | self.assertEqual(atbat.game, pitch.game) 50 | 51 | # class TestDB(TestCase): 52 | # def setUp(self): 53 | # self.game = createGame() 54 | # self.atbat = createAtBat(self.game) 55 | # self.pitch = createPitch(self.atbat) 56 | 57 | # game_pk = 1 58 | # def createGame(): 59 | # conn, cursor = get_cursor(postgres_test_db, postgres_user, postgres_password) 60 | # sql = """ 61 | # INSERT INTO game 62 | # (game_pk, away_team_code, home_team_code, away_fname, home_fname, away_sname, home_sname, date) 63 | # VALUES 64 | # (%d, 'BOS', 'CHA', 'Boston Red Sox', 'Chicago White Sox', '', '', '2013-04-01') 65 | # """ 66 | # cursor.execute(sql, game_pk ) 67 | # game_pk += 1 68 | # return game_pk - 1 69 | 70 | # playerid = 1 71 | # def createPlayer(): 72 | # sql = """ 73 | # INSERT INTO player 74 | # (id, first, last) 75 | # VALUES 76 | # (%d, 'Testy', 'Testerson') 77 | # """ 78 | # conn, cursor = get_cursor(postgres_test_db, postgres_user, postgres_password) 79 | # cursor.execute(sql, playerid) 80 | # playerid += 1 81 | # return playerid - 1 82 | 83 | # atbatnum = 1 84 | # def createAtBat(game, pitcher, batter): 85 | # sql = """ 86 | # INSERT INTO atbat 87 | # (inning, num, b, s, batter, stand, p_throws, pitcher, des, event, brief_event, game_pk, date) 88 | # VALUES 89 | # (1, %d, 1, 1, %d, 'R', 'R', %d, 'batted ball hits a dove', 'single', 'single', %d, '2013-04-01') 90 | # """ 91 | # conn, cursor = get_cursor(postgres_test_db, postgres_user, postgres_password) 92 | # cursor.execute(sql, batter.id, pitcher.id, game.game_pk) 93 | # atbatnum += 1 94 | # return atbatnum - 1 95 | 96 | 97 | 98 | if __name__ == "__main__": 99 | nose.main() 100 | -------------------------------------------------------------------------------- /pypitches/wakefield.yaml: -------------------------------------------------------------------------------- 1 | name: [Tim, Wakefield] 2 | all_file: wakefield_all.png 3 | plots: 4 | - 5 | 'pitch_type': 'FF' 6 | 'filename': 'wake_fourseam.png' 7 | 'marker': 'o' 8 | 'color': '#CC0000' 9 | - 10 | 'pitch_type': 'KN' 11 | 'filename': 'wake_knuckle.png' 12 | 'marker': 'p' 13 | 'color': '#DD00DD' 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /pypitches/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gregoryck/pypitches/4c3e5d6987a8e7b23fc25022a00af0c15318ccd0/pypitches/web/__init__.py -------------------------------------------------------------------------------- /pypitches/web/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, session, url_for, render_template, flash, send_from_directory, jsonify 2 | import load 3 | import traceback 4 | from os.path import join 5 | from model import SessionManager, GameDir 6 | 7 | 8 | app = Flask(__name__) 9 | Session = None 10 | 11 | @app.route('/pypitches/player') 12 | def player(): 13 | return render_template('player.html') 14 | 15 | @app.route('/pypitches/_player_name_completion') 16 | def player_name_completion(): 17 | return jsonify({'error': 'NotImplemented'}) 18 | 19 | @app.route('/pypitches/_player_charts') 20 | def player_charts(): 21 | return jsonify({'error': 'NotImplemented'}) 22 | 23 | @app.route('/pypitches/status') 24 | def status(): 25 | return render_template("status.html") 26 | 27 | @app.route('/pypitches/controls') 28 | def controls(): 29 | return render_template("controls.html") 30 | 31 | @app.route('/pypitches/load', methods=['POST']) 32 | def load(): 33 | gamedir_id = request.form['gamedir_id'] 34 | 35 | @app.route('/pypitches/_gamedata') 36 | def gamedirs(): 37 | start = int(request.args['iDisplayStart']) 38 | length = int(request.args['iDisplayLength']) 39 | columns = int(request.args['iColumns']) 40 | echo = int(request.args['sEcho']) 41 | print start, length, columns, echo 42 | try: 43 | query = Session.query(GameDir.path, GameDir.url, GameDir.downloaded_time, GameDir.loaded_time, GameDir.date_scheduled) 44 | count = query.count() 45 | rows = [fmt_row(row, ident, ident, str, str, str) for row in query.all()] 46 | return jsonify({ 47 | 'iTotalRecords': count, 48 | 'iTotalDisplayRecords': count, 49 | 'sEcho': echo, 50 | 'aaData': rows, 51 | 'DT_RowClass': 'any_row', 52 | }) 53 | except: 54 | print traceback.format_exc() 55 | Session.rollback() 56 | return "
{0}
".format(traceback.format_exc()) 57 | 58 | def ident(x): 59 | return x 60 | 61 | 62 | def fmt_row(row, *types): 63 | return [ ty(col) for ty, col in zip(types, row)] 64 | 65 | 66 | @app.route('/static/') 67 | def send_foo(filename): 68 | print "hi!", filename 69 | print app.root_path 70 | try: 71 | return send_from_directory(join(app.root_path, 'static'), filename) 72 | except: 73 | print "hey!" 74 | traceback.print_exc() 75 | raise 76 | -------------------------------------------------------------------------------- /pypitches/web/static/countries.json: -------------------------------------------------------------------------------- 1 | { 2 | identifier: 'id', 3 | label: 'name', 4 | items: [ 5 | { id: 'AF', name:'Africa', type:'continent', population:'900 million', area: '30,221,532 sq km', 6 | timezone: '-1 UTC to +4 UTC', 7 | children:[{_reference:'EG'}, {_reference:'KE'}, {_reference:'SD'}] }, 8 | { id: 'EG', name:'Egypt', type:'country' }, 9 | { id: 'KE', name:'Kenya', type:'country', 10 | children:[{_reference:'Nairobi'}, {_reference:'Mombasa'}] }, 11 | { id: 'Nairobi', name:'Nairobi', type:'city' }, 12 | { id: 'Mombasa', name:'Mombasa', type:'city' }, 13 | { id: 'SD', name:'Sudan', type:'country', 14 | children:{_reference:'Khartoum'} }, 15 | { id: 'Khartoum', name:'Khartoum', type:'city' }, 16 | { id: 'AS', name:'Asia', type:'continent', 17 | children:[{_reference:'CN'}, {_reference:'IN'}, {_reference:'RU'}, {_reference:'MN'}] }, 18 | { id: 'CN', name:'China', type:'country' }, 19 | { id: 'IN', name:'India', type:'country' }, 20 | { id: 'RU', name:'Russia', type:'country' }, 21 | { id: 'MN', name:'Mongolia', type:'country' }, 22 | { id: 'OC', name:'Oceania', type:'continent', population:'21 million', 23 | children:{_reference:'AU'}}, 24 | { id: 'AU', name:'Australia', type:'country', population:'21 million'}, 25 | { id: 'EU', name:'Europe', type:'continent', 26 | children:[{_reference:'DE'}, {_reference:'FR'}, {_reference:'ES'}, {_reference:'IT'}] }, 27 | { id: 'DE', name:'Germany', type:'country' }, 28 | { id: 'FR', name:'France', type:'country' }, 29 | { id: 'ES', name:'Spain', type:'country' }, 30 | { id: 'IT', name:'Italy', type:'country' }, 31 | { id: 'NA', name:'North America', type:'continent', 32 | children:[{_reference:'MX'}, {_reference:'CA'}, {_reference:'US'}] }, 33 | { id: 'MX', name:'Mexico', type:'country', population:'108 million', area:'1,972,550 sq km', 34 | children:[{_reference:'Mexico City'}, {_reference:'Guadalajara'}] }, 35 | { id: 'Mexico City', name:'Mexico City', type:'city', population:'19 million', timezone:'-6 UTC'}, 36 | { id: 'Guadalajara', name:'Guadalajara', type:'city', population:'4 million', timezone:'-6 UTC' }, 37 | { id: 'CA', name:'Canada', type:'country', population:'33 million', area:'9,984,670 sq km', 38 | children:[{_reference:'Ottawa'}, {_reference:'Toronto'}] }, 39 | { id: 'Ottawa', name:'Ottawa', type:'city', population:'0.9 million', timezone:'-5 UTC'}, 40 | { id: 'Toronto', name:'Toronto', type:'city', population:'2.5 million', timezone:'-5 UTC' }, 41 | { id: 'US', name:'United States of America', type:'country' }, 42 | { id: 'SA', name:'South America', type:'continent', 43 | children:[{_reference:'BR'}, {_reference:'AR'}] }, 44 | { id: 'BR', name:'Brazil', type:'country', population:'186 million' }, 45 | { id: 'AR', name:'Argentina', type:'country', population:'40 million' } 46 | ]} 47 | -------------------------------------------------------------------------------- /pypitches/web/static/datatables.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Colour dictionary: 3 | * 4 | * Table control elements: #719ba7 5 | * Header cells: #66A9BD 6 | * Body header cells: #91c5d4 7 | * Body content cells: #d5eaf0 8 | * Body content cells (alt): #bcd9e1 9 | * Footer header: #b0cc7f 10 | * Footer content: #d7e1c5 11 | */ 12 | 13 | 14 | /* 15 | * Page setup styles 16 | */ 17 | body { 18 | font: 80%/1.45em Arial, Verdana, Helvetica, sans-serif; 19 | margin: 0; 20 | padding: 0; 21 | color: #111; 22 | background-color: #fff; 23 | } 24 | 25 | #container { 26 | margin: 0 auto; 27 | width: 960px 28 | } 29 | 30 | h1 { 31 | text-align: center; 32 | font-size: 1.2em; 33 | font-weight: bold; 34 | margin: 1em 0; 35 | } 36 | 37 | 38 | /* 39 | * DataTables framework 40 | */ 41 | div.dataTables_wrapper { 42 | background-color: #D9D4D6; 43 | } 44 | 45 | div.dataTables_length { 46 | float: left; 47 | } 48 | 49 | div.dataTables_filter { 50 | float: right; 51 | } 52 | 53 | div.dataTables_info { 54 | padding: 9px 6px 6px 6px; 55 | float: left; 56 | } 57 | 58 | div.dataTables_paginate { 59 | float: right; 60 | } 61 | 62 | div.dataTables_length, 63 | div.dataTables_filter, 64 | div.dataTables_paginate { 65 | padding: 6px; 66 | } 67 | 68 | /* Self clearing - http://www.webtoolkit.info/css-clearfix.html */ 69 | .dataTables_wrapper:after { 70 | content: "."; 71 | display: block; 72 | clear: both; 73 | visibility: hidden; 74 | line-height: 0; 75 | height: 0; 76 | } 77 | 78 | html[xmlns] .dataTables_wrapper { 79 | display: block; 80 | } 81 | 82 | * html .dataTables_wrapper { 83 | height: 1%; 84 | } 85 | 86 | 87 | /* 88 | * Table styles 89 | */ 90 | table.pretty { 91 | width: 100%; 92 | clear: both; 93 | } 94 | 95 | table.pretty td, 96 | table.pretty th { 97 | padding: 5px; 98 | border: 1px solid #fff; 99 | } 100 | 101 | /* Header cells */ 102 | table.pretty thead th { 103 | text-align: center; 104 | background: #E3E1E2; 105 | } 106 | 107 | /* Body cells */ 108 | table.pretty tbody th { 109 | text-align: left; 110 | background: #E3E1E2; 111 | } 112 | 113 | table.pretty tbody td { 114 | text-align: center; 115 | background: #CCCACB; 116 | } 117 | 118 | table.pretty tbody tr.odd td { 119 | background: #E3E1E2; 120 | } 121 | 122 | table.pretty tbody tr.row_selected td { 123 | background: #B3041B; 124 | } 125 | 126 | /* Footer cells */ 127 | table.pretty tfoot th { 128 | background: #E3E1E2; 129 | text-align: left; 130 | } 131 | 132 | table.pretty tfoot td { 133 | background: #E3E1E2; 134 | text-align: center; 135 | font-weight: bold; 136 | } 137 | 138 | 139 | 140 | /* 141 | * Pagination 142 | */ 143 | a.paginate_button, 144 | a.paginate_active { 145 | display: inline-block; 146 | background-color: #D5D9F5; 147 | padding: 2px 6px; 148 | margin-left: 2px; 149 | cursor: pointer; 150 | *cursor: hand; 151 | } 152 | 153 | a.paginate_active { 154 | background-color: transparent; 155 | border: 1px solid black; 156 | } 157 | 158 | a.paginate_button_disabled { 159 | color: #111111; 160 | } 161 | .paging_full_numbers a:active { 162 | outline: none 163 | } 164 | .paging_full_numbers a:hover { 165 | text-decoration: none; 166 | } 167 | 168 | div.dataTables_paginate span>a { 169 | width: 15px; 170 | text-align: center; 171 | } 172 | 173 | 174 | /* 175 | * Sorting 176 | */ 177 | table.pretty thead th.sorting_asc { 178 | background: #CCCFE3 url('images/sort_asc.png') no-repeat right center; 179 | } 180 | 181 | table.pretty thead th.sorting_desc { 182 | background: #CCCFE3 url('images/sort_desc.png') no-repeat right center; 183 | } 184 | 185 | table.pretty thead th.sorting { 186 | background: #CCCFE3 url('images/sort_both.png') no-repeat right center; 187 | } 188 | 189 | -------------------------------------------------------------------------------- /pypitches/web/static/fixed.css: -------------------------------------------------------------------------------- 1 | html,body{margin:0;padding:0} 2 | body{font: 76% arial,sans-serif;text-align:center} 3 | p{margin:0 10px 10px} 4 | a{display:block;color: #981793;padding:10px} 5 | div#header h1{height:80px;line-height:80px;margin:0; 6 | padding-left:10px;background: #EEE;color: #79B30B} 7 | div#container{text-align:left} 8 | div#content p{line-height:1.4} 9 | div#navigation{background:#B9CAFF} 10 | div#extra{background:#FF8539} 11 | div#footer{background: #333;color: #FFF} 12 | div#footer p{margin:0;padding:5px 10px} 13 | 14 | div#container{width:900px;margin:0 auto} 15 | div#content{float:right;width:700px} 16 | div#navigation{float:left;width:200px} 17 | div#extra{float:left;clear:left;width:200px} 18 | div#footer{clear:both;width:100%} 19 | -------------------------------------------------------------------------------- /pypitches/web/templates/controls.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block body %} 3 | No. 4 | {% endblock %} 5 | -------------------------------------------------------------------------------- /pypitches/web/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% block title %}PyPitches{% endblock %} 5 | 6 | 7 | 8 | {% block scripts %}{% endblock %} 9 | 10 | 11 | {% with flashes = get_flashed_messages() %} 12 | {% if flashes %} 13 | 18 | {% endif %} 19 | {% endwith %} 20 |
21 | 22 |
23 |
24 | {% block content %}{% endblock %} 25 |
26 |
27 | 35 |
36 |
37 |
38 | 39 | 40 | -------------------------------------------------------------------------------- /pypitches/web/templates/login.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block title %}Sign In{% endblock %} 3 | {% block body %} 4 |

Sign In

5 | {% if error %}
Error: {{ error }}
{% endif %} 6 |
7 |
8 |
Username: 9 |
10 |
Password: 11 |
12 |
13 |
14 |
15 | {% endblock %} 16 | 17 | -------------------------------------------------------------------------------- /pypitches/web/templates/register.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block title %}Sign Up{% endblock %} 3 | {% block body %} 4 |

Sign Up

5 | {% if error %}
Error: {{ error }}
{% endif %} 6 |
7 |
8 |
Username: 9 |
10 |
E-Mail: 11 |
12 |
Password: 13 |
14 |
Password (repeat): 15 |
16 |
17 |
18 |
19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /pypitches/web/templates/status.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block title %} 3 | Status 4 | {% endblock %} 5 | {% block scripts %} 6 | 8 | 10 | 38 | 39 | {% endblock %} 40 | 41 | {% block content %} 42 |

{{ self.title() }}

43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 |
PathURLDownloadedLoadedDate Scheduled
54 | {% endblock %} 55 | -------------------------------------------------------------------------------- /pypitches/web/templates/timeline.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block title %} 3 | {% if request.endpoint == 'public_timeline' %} 4 | Public Timeline 5 | {% elif request.endpoint == 'user_timeline' %} 6 | {{ profile_user.username }}'s Timeline 7 | {% else %} 8 | My Timeline 9 | {% endif %} 10 | {% endblock %} 11 | {% block body %} 12 |

{{ self.title() }}

13 | {% if g.user %} 14 | {% if request.endpoint == 'user_timeline' %} 15 |
16 | {% if g.user.user_id == profile_user.user_id %} 17 | This is you! 18 | {% elif followed %} 19 | You are currently following this user. 20 | Unfollow user. 22 | {% else %} 23 | You are not yet following this user. 24 | . 26 | {% endif %} 27 |
28 | {% elif request.endpoint == 'timeline' %} 29 |
30 |

What's on your mind {{ g.user.username }}?

31 |
32 |

34 |

35 |
36 | {% endif %} 37 | {% endif %} 38 | 49 | {% endblock %} 50 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | BeautifulSoup==3.2.1 2 | Flask==0.9 3 | Jinja2==2.6 4 | PyYAML==3.10 5 | SQLAlchemy==0.7.10 6 | Werkzeug==0.8.3 7 | argparse==1.2.1 8 | ipython==0.13.1 9 | psycopg2==2.4.6 10 | python-dateutil==2.1 11 | six==1.2.0 12 | wsgiref==0.1.2 13 | matplotlib==1.2.0 14 | numpy==1.6.2 15 | nose==1.2.1 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup ( 4 | name = "PyPitches", 5 | version = "0.1.0", 6 | author = "Greg Kettler", 7 | author_email = "gkettler@gmail.com", 8 | packages=['pypitches', 'pypitches.test'], 9 | #scripts=[], 10 | url="http://thefamilyatomics.com/pypitches", 11 | description="Analyzing Major League Baseball pitch data from PITCHf/x", 12 | long_description=open('README.rst').read(), 13 | requires=[ 14 | "SQLAlchemy (>=0.7)", 15 | "BeautifulSoup (>=3.2.0)", 16 | "matplotlib (>=1.0.0)", 17 | "numpy (>=1.6.1)", 18 | "flask (>=0.8)", 19 | "ipython (>=0.12)", 20 | "PyYAML (>=3.10)", 21 | "nose (>=1.1.2)", 22 | "coverage (>=3.5.2)", 23 | "Jinja2 (>=2.6)", 24 | "flask (>=0.9)", 25 | "psycopg2 (>=2.4.4)", 26 | ], 27 | ) 28 | 29 | -------------------------------------------------------------------------------- /sql/baseball.sql: -------------------------------------------------------------------------------- 1 | begin; 2 | DROP VIEW IF EXISTS ranges; 3 | DROP TABLE IF EXISTS Pitch; 4 | DROP TABLE IF EXISTS Runner; 5 | DROP TABLE IF EXISTS Atbat; 6 | DROP TABLE IF EXISTS Playeringame; 7 | DROP TABLE IF EXISTS Game; 8 | DROP TABLE IF EXISTS Stadium; 9 | DROP TABLE IF EXISTS Player; 10 | DROP TABLE IF EXISTS Team; 11 | DROP TABLE IF EXISTS gamedir; 12 | 13 | CREATE TABLE stadium ( 14 | id INTEGER PRIMARY KEY, 15 | name VARCHAR(128), 16 | location VARCHAR(128) 17 | ); 18 | 19 | CREATE TABLE team ( 20 | id INTEGER, 21 | code CHAR(3) PRIMARY KEY, 22 | name VARCHAR(64), 23 | name_full VARCHAR(128), 24 | name_brief VARCHAR(64) 25 | ); 26 | 27 | CREATE TABLE player ( 28 | id INTEGER PRIMARY KEY, 29 | first VARCHAR(64), 30 | last VARCHAR(64), 31 | boxname VARCHAR(64), 32 | rl CHAR(1) -- does this change? 33 | ); 34 | 35 | CREATE TABLE game ( 36 | game_pk INTEGER PRIMARY KEY, 37 | type CHAR(1), 38 | 39 | away_team_code CHAR(3), 40 | home_team_code CHAR(3), 41 | away_fname VARCHAR(42), 42 | home_fname VARCHAR(42), 43 | away_sname VARCHAR(16), 44 | home_sname VARCHAR(16), 45 | stadium INTEGER, 46 | date VARCHAR(32), 47 | 48 | FOREIGN KEY (away_team_code) REFERENCES team (code), 49 | FOREIGN KEY (home_team_code) REFERENCES team (code), 50 | FOREIGN KEY (stadium) REFERENCES stadium (id) 51 | ); 52 | 53 | CREATE TABLE playeringame ( 54 | id INTEGER NOT NULL, 55 | num INTEGER, -- this changes 56 | position CHAR(2), 57 | bat_order INTEGER, 58 | game_position CHAR(2), -- the difference? 59 | avg FLOAT, 60 | hr INTEGER, 61 | rbi INTEGER, 62 | wins INTEGER, 63 | losses INTEGER, 64 | era FLOAT, 65 | 66 | game_pk INTEGER NOT NULL, 67 | 68 | FOREIGN KEY (id) REFERENCES player (id), 69 | FOREIGN KEY (game_pk) REFERENCES game (game_pk), 70 | PRIMARY KEY (id, game_pk) 71 | ); 72 | 73 | CREATE TABLE atbat ( 74 | 75 | inning INTEGER, 76 | num INTEGER NOT NULL, 77 | b INTEGER, 78 | s INTEGER, 79 | batter INTEGER, 80 | stand CHAR(1), 81 | p_throws CHAR(1), 82 | b_height VARCHAR(32), 83 | pitcher INTEGER, 84 | des VARCHAR(512), 85 | event VARCHAR(128), 86 | brief_event VARCHAR(128), 87 | 88 | game_pk INTEGER NOT NULL, 89 | date VARCHAR(32), 90 | 91 | FOREIGN KEY (game_pk) REFERENCES game (game_pk), 92 | FOREIGN KEY (batter) REFERENCES player (id), 93 | FOREIGN KEY (pitcher) REFERENCES player (id), 94 | PRIMARY KEY (game_pk, num) 95 | 96 | ); 97 | 98 | --- 99 | ---CREATE SEQUENCE runner_runner_pk_seq --:POSTGRES 100 | --- INCREMENT BY 1 --:POSTGRES 101 | --- NO MAXVALUE --:POSTGRES 102 | --- NO MINVALUE --:POSTGRES 103 | --- CACHE 1; --:POSTGRES 104 | --- 105 | 106 | 107 | CREATE TABLE runner ( 108 | 109 | runner_pk SERIAL PRIMARY KEY, 110 | 111 | 112 | atbatnum INTEGER, 113 | game_pk INTEGER, 114 | 115 | id INTEGER, 116 | "start" VARCHAR(4), 117 | "end" VARCHAR(4), 118 | score CHAR(1), 119 | rbi CHAR(1), 120 | earned CHAR(1), 121 | event VARCHAR(128), 122 | 123 | FOREIGN KEY (atbatnum, game_pk) REFERENCES atbat (num, game_pk), 124 | FOREIGN KEY (game_pk) REFERENCES game (game_pk), 125 | FOREIGN KEY (id) REFERENCES player (id) 126 | ); 127 | 128 | CREATE TABLE pitch ( 129 | des VARCHAR(256), 130 | type CHAR(1), 131 | id INTEGER, 132 | x FLOAT, 133 | y FLOAT, 134 | sv_id VARCHAR(128), 135 | start_speed float, 136 | end_speed FLOAT, 137 | sz_top FLOAT, 138 | sz_bot FLOAT, 139 | pfx_x FLOAT, 140 | pfx_z FLOAT, 141 | px FLOAT, 142 | pz FLOAT, 143 | x0 FLOAT, 144 | y0 FLOAT, 145 | z0 FLOAT, 146 | vx0 FLOAT, 147 | vy0 FLOAT, 148 | vz0 FLOAT, 149 | ax FLOAT, 150 | ay FLOAT, 151 | az FLOAT, 152 | break_y FLOAT, 153 | break_angle FLOAT, 154 | break_length FLOAT, 155 | pitch_type VARCHAR(4), 156 | type_confidence FLOAT, 157 | spin_dir FLOAT, 158 | spin_rate FLOAT, 159 | nasty INTEGER, 160 | on_1b INTEGER, 161 | on_2b INTEGER, 162 | on_3b INTEGER, 163 | 164 | payoff BOOLEAN, --POSTGRES 165 | 166 | balls INTEGER, 167 | strikes INTEGER, 168 | 169 | game_pk INTEGER, 170 | pitcher INTEGER, 171 | batter INTEGER, 172 | atbatnum INTEGER, 173 | FOREIGN KEY (pitcher) REFERENCES player (id), 174 | FOREIGN KEY (batter) REFERENCES player (id), 175 | FOREIGN KEY (on_1b) REFERENCES player (id), 176 | FOREIGN KEY (on_2b) REFERENCES player (id), 177 | FOREIGN KEY (on_3b) REFERENCES player (id), 178 | FOREIGN KEY (game_pk, atbatnum) REFERENCES atbat (game_pk, num), 179 | 180 | PRIMARY KEY (game_pk, id) 181 | ); 182 | 183 | CREATE INDEX atbat_game on atbat (game_pk); 184 | CREATE INDEX atbat_batter on atbat (batter); 185 | CREATE INDEX atbat_pitcher on atbat (pitcher); 186 | CREATE INDEX playeringame_player on playeringame (id); 187 | CREATE INDEX playeringame_game on playeringame (game_pk); 188 | CREATE INDEX game_away on game (away_team_code); 189 | CREATE INDEX game_home on game (home_team_code); 190 | CREATE INDEX pitch_pitcher on pitch (pitcher); 191 | CREATE INDEX pitch_batter on pitch (batter); 192 | CREATE INDEX pitch_atbat on pitch (game_pk, atbatnum); 193 | 194 | 195 | 196 | 197 | -- administration tables_file 198 | CREATE TABLE gamedir ( 199 | id SERIAL PRIMARY KEY, 200 | local_copy BOOLEAN NOT NULL DEFAULT FALSE, 201 | url TEXT UNIQUE, 202 | path TEXT UNIQUE, 203 | status TEXT , -- or enum? final, postponed, error, what else? 204 | -- NULL means I haven't looked yet 205 | status_long TEXT, -- exactly what's the problem officer 206 | loaded BOOLEAN NOT NULL DEFAULT FALSE, 207 | game_pk INTEGER, 208 | atbats INTEGER, 209 | innings INTEGER, 210 | downloaded_time TIMESTAMP, 211 | loaded_time TIMESTAMP, 212 | classified_time TIMESTAMP NOT NULL DEFAULT now(), 213 | date_scheduled DATE, 214 | 215 | -- no foreign key! I fill this table before loading games "for real" 216 | --FOREIGN KEY (game_pk) REFERENCES game(game_pk), 217 | UNIQUE(url, path) 218 | ); 219 | 220 | create or replace language plpgsql; 221 | 222 | --create or replace aggregate avg(float) 223 | --( 224 | --sfunc = accu 225 | --); 226 | 227 | --create or replace aggregate normalize(float) 228 | --( 229 | -- sfunc=array_ap 230 | 231 | -- want to normalize... 232 | -- start_speed 233 | -- pfx_x (flip for sinisters) 234 | -- pfx_z 235 | -- release point? 236 | 237 | CREATE OR REPLACE FUNCTION normalized_speed(get_game_pk INTEGER, get_num INTEGER) RETURNS FLOAT AS 238 | $normalized_speed$ 239 | -- takes the primary key of a pitch. 240 | -- returns that pitch's speed normalized to the range [0,1.0] 241 | -- where 0 is the slowest and 1.0 the fastest pitch 242 | -- thrown by that pitcher 243 | 244 | -- using the very slowest pitch is bad because over the course of a season, 245 | -- that's probably an intentional ball: very slow indeed. 246 | -- What's the slowest "in anger" pitch? 247 | -- want to take 10th percentile probably 248 | 249 | declare 250 | start_speed_to_normalize float; 251 | pitcher_pk INTEGER; 252 | fastest float; 253 | slowest float; 254 | begin 255 | 256 | select start_speed, pitcher from pitch where game_pk=get_game_pk 257 | and id = get_num into start_speed_to_normalize, pitcher_pk; 258 | select avg(start_speed) + 2 * stddev(start_speed) from pitch where pitcher = pitcher_pk 259 | into fastest; 260 | select avg(start_speed) - 2*stddev(start_speed) from pitch where pitcher = pitcher_pk 261 | into slowest; 262 | return 2* (start_speed_to_normalize - slowest) / (fastest - slowest) - 0.5 ; 263 | end; 264 | $normalized_speed$ language 'plpgsql'; 265 | 266 | CREATE OR REPLACE FUNCTION lefty(p_throws char) 267 | RETURNS INTEGER AS 268 | $lefty$ 269 | BEGIN 270 | IF p_throws = 'L' THEN 271 | RETURN 1; 272 | ELSE 273 | RETURN -1; 274 | END IF; 275 | END; 276 | $lefty$ LANGUAGE 'plpgsql'; 277 | 278 | 279 | 280 | 281 | --CREATE TABLE ranges ( 282 | -- low_speed FLOAT, 283 | -- high_speed FLOAT, 284 | -- low_pfx_x FLOAT, 285 | -- high_pfx_x FLOAT, 286 | -- low_pfx_x FLOAT, 287 | -- high_pfx_x FLOAT, 288 | -- pitcher_id INTEGER, 289 | -- 290 | -- PRIMARY KEY (pitcher_id), 291 | -- FOREIGN KEY (pitcher_id) REFERENCES player (id) 292 | --); 293 | -- 294 | 295 | 296 | 297 | 298 | -- FOREIGN KEY (pitcher) references player (id), 299 | -- primary key (pitcher) 300 | CREATE VIEW ranges AS 301 | SELECT pitch.pitcher, 302 | avg(start_speed) - 2*stddev(start_speed) as low_speed, 303 | avg(start_speed) + 2 * stddev(start_speed) as high_speed, 304 | (avg(pfx_x * lefty(p_throws)) - 2*stddev(pfx_x * lefty(p_throws))) as low_pfx_x, 305 | (avg(pfx_x * lefty(p_throws)) + 2*stddev(pfx_x * lefty(p_throws))) as high_pfx_x, 306 | avg(pfx_z) - 2*stddev(pfx_z) as low_pfx_z, 307 | avg(pfx_z) + 2*stddev(pfx_z) as high_pfx_z 308 | FROM pitch 309 | join atbat on pitch.atbatnum = atbat.num and pitch.game_pk = atbat.game_pk 310 | GROUP BY pitch.pitcher; 311 | 312 | 313 | 314 | 315 | 316 | commit; --------------------------------------------------------------------------------