├── README.md ├── r_scripts ├── markov_chain_viz.r ├── career_length_survival_analysis.r ├── hof_factor_analysis.r ├── similarity_data_viz.r ├── anomaly_detection_royals_stats.r ├── arrieta_no_hitter_analysis.r ├── chris_sale_lca.r ├── smoltz_text_analysis.r ├── multidimensional_scaling_2016_teams.r ├── historical_team_clustering.r ├── hosmer_statcast_analysis.r ├── mlb_attendance_analysis.r ├── rookie_all_star_predictions.Rmd └── association_rules_2016_games.r ├── python_scripts ├── royals_monte_carlo.py ├── pitchfx_scraper.py ├── all_star_network_analysis.py ├── survival_ingestion_wrangling.py ├── home_runs.py ├── pitching_markov_chain.py ├── pitcher_similarity.py └── match_up_simulations.py └── csv_outputs └── historical_team_clustering_results.csv /README.md: -------------------------------------------------------------------------------- 1 | # baseballdatascience 2 | 3 | This repository contains R and Python scripts for projects on baseballdatascience.com. 4 | -------------------------------------------------------------------------------- /r_scripts/markov_chain_viz.r: -------------------------------------------------------------------------------- 1 | # Library imports 2 | library(ggplot2) 3 | library(ggpubr) 4 | 5 | # Read in data 6 | transitions <- read.csv('scherzer_transitions.csv') 7 | pitch_counts <- read.csv('pitches_in_counts.csv') 8 | 9 | # Transition Probabilities Plot 10 | ggdotchart(transitions, x = "Transition", y = "Probability", 11 | color = "Probability", 12 | sorting = "descending", 13 | rotate = TRUE, 14 | dot.size = 2, 15 | y.text.col = TRUE, 16 | ggtheme = theme_pubr() 17 | )+ 18 | theme_cleveland() 19 | 20 | # Pitch Counts Plot 21 | ggdotchart(pitch_counts, x = "pitch", y = "pitch_percentage", 22 | color = "count", 23 | palette = c("#FF0000", "#00FF00", "#0000FF", 24 | "#00FFFF", "#800080", "#FF7F50", 25 | "#FF8C00", "#008000", "#00FA9A", 26 | "#4169E1", "#00FFFF", "#FFD700"), 27 | 28 | sorting = "ascending", 29 | add = "segments", 30 | ggtheme = theme_pubr() 31 | ) 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /r_scripts/career_length_survival_analysis.r: -------------------------------------------------------------------------------- 1 | # Career Length Survival Analysis 2 | setwd("C:/Users/Micah/Desktop/Baseball Data Science/Career Length Survival Analysis") 3 | 4 | # Import libraries 5 | library(reshape) 6 | library(dplyr) 7 | library(tidyverse) 8 | library(survival) 9 | library(survminer) 10 | 11 | # Read in data 12 | df <- read.csv('player_data_for_survival_analysis.csv') 13 | 14 | # Survival Analysis 15 | s <- Surv(df$time_in_mlb, df$status) 16 | class(s) 17 | 18 | # Survival analysis that doesn't consider any groupings 19 | survfit(s~1) 20 | survfit(Surv(time_in_mlb, status)~1, data=df) 21 | sfit <- survfit(Surv(time_in_mlb, status)~1, data=df) 22 | ggsurvplot(sfit) 23 | 24 | # Survival analysis with hits 25 | sfit1 <- survfit(Surv(time_in_mlb, status) ~ hits, 26 | data=df) 27 | 28 | summary(sfit1) 29 | summary(sfit1, times=seq(0, 8000, 500)) 30 | plot(sfit1) 31 | ggsurvplot(sfit1) 32 | 33 | # Survival analysis with hits and throws 34 | sfit2 <- survfit(Surv(time_in_mlb, status) ~ hits + throws, 35 | data=df) 36 | 37 | ggsurvplot(sfit2) 38 | 39 | # Survival analysis with birth_country 40 | sfit3 <- survfit(Surv(time_in_mlb, status) ~ birth_country, 41 | data=df) 42 | 43 | ggsurvplot(sfit3) 44 | 45 | # Survival analysis with height 46 | sfit4 <- survfit(Surv(time_in_mlb, status) ~ height, 47 | data=df) 48 | 49 | ggsurvplot(sfit4) 50 | 51 | # Cox regression 52 | fit <- coxph(Surv(time_in_mlb, status) ~ average_salary + 53 | birth_country + weight + height + hits + 54 | throws + age_at_debut, data = df) 55 | 56 | fit 57 | 58 | 59 | -------------------------------------------------------------------------------- /r_scripts/hof_factor_analysis.r: -------------------------------------------------------------------------------- 1 | # Factor analysis on offensive statistics for HoFers 2 | 3 | # Library imports 4 | library(nFactors) 5 | library(gplots) 6 | library(RColorBrewer) 7 | library(semPlot) 8 | 9 | # Read in data 10 | hof <- read.csv('hof_hitting_stats.csv') 11 | 12 | # Determine number of factors 13 | nScree(hof) 14 | eigen(cor(hof)) 15 | 16 | # Looks like three factors will be optimal 17 | # Run factor analysis using three factors 18 | fa <- factanal(hof, factors = 3) 19 | print(fa) 20 | 21 | # Create a heatmap of the loadings 22 | heatmap.2(fa$loadings, col = brewer.pal(9, "Greens"), trace = "none", 23 | key = FALSE, dend = 'none', Colv = FALSE, cexCol = 1.2, 24 | main = "\n\n\n\n\nFactor Loadings for HoF Hitting Stats") 25 | 26 | # Create SEM plot of the factors 27 | semPaths(fa, what = "est", residuals = FALSE, cut = 0.4, 28 | posCol = c("white", "darkgreen"), 29 | negCol = c("white", "red"), 30 | edge.label.cex = 0.60, nCharNodes = 7) 31 | 32 | ########################################################### 33 | # Lets do the same for pitching data 34 | 35 | # Read in data 36 | hof_pitch <- read.csv('hof_pitching_stats.csv') 37 | 38 | # Determine number of factors 39 | nScree(hof_pitch) 40 | eigen(cor(hof_pitch)) 41 | 42 | # Looks like two factors will be optimal 43 | # Run factor analysis using two factors 44 | # However, the algorithm isn't working on this data 45 | # with two factors, so we'll switch to 3 46 | fa2 <- factanal(hof_pitch, factors = 3) 47 | print(fa2) 48 | 49 | # Create a heatmap of the loadings 50 | heatmap.2(fa2$loadings, col = brewer.pal(9, "Greens"), trace = "none", 51 | key = FALSE, dend = 'none', Colv = FALSE, cexCol = 1.2, 52 | main = "\n\n\n\n\nFactor Loadings for HoF Pitching Stats") 53 | 54 | # Create SEM plot of the factors 55 | semPaths(fa2, what = "est", residuals = FALSE, cut = 0.4, 56 | posCol = c("white", "darkgreen"), 57 | negCol = c("white", "red"), 58 | edge.label.cex = 0.60, nCharNodes = 7) 59 | -------------------------------------------------------------------------------- /r_scripts/similarity_data_viz.r: -------------------------------------------------------------------------------- 1 | # Library imports 2 | 3 | library(ggplot2) 4 | library(MASS) 5 | library(colorRamps) 6 | library(FactoMineR) 7 | library(factoextra) 8 | library(gplots) 9 | 10 | # Parallel coordinates for Pedro 11 | pedro <- read.csv('pedro_sims.csv') 12 | pedro <- pedro[-c(1)] 13 | 14 | c <- blue2red(15) 15 | r <- cut(pedro$Counter, 15) 16 | parcoord(pedro, col=c[as.numeric(r)]) 17 | 18 | # Jitter plot for Clemens 19 | clemens <- read.csv('clemens1997.csv') 20 | clemens <- clemens[order(clemens$Similarity),] 21 | 22 | ggplot(clemens, aes(x=Pitcher_and_Year, y=Similarity)) + 23 | geom_jitter(alpha=0.5, position = position_jitter(width = 0.1)) + 24 | ggtitle("Pitchers Most Similar to 1997 Roger Clemens") + 25 | labs(y="Similarity", x='Player and Year') + 26 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 27 | theme(plot.title = element_text(hjust = 0.5)) 28 | 29 | # Circular bar chart for Johnson 30 | johnson <- read.csv('johnson2002.csv') 31 | johnson <- johnson[order(johnson$Similarity),] 32 | 33 | ggplot(johnson, aes(x = Pitcher_and_Year, y = Similarity, 34 | fill = Pitcher_and_Year)) + 35 | geom_bar(width = 0.85, stat="identity") + 36 | 37 | # To use a polar plot and not a basic barplot 38 | coord_polar(theta = "y") + 39 | 40 | #Remove useless labels of axis 41 | xlab("") + ylab("") + 42 | 43 | #Increase ylim to avoid having a complete circle 44 | ylim(c(0,1.5)) + 45 | 46 | #Add group labels close to the bars : 47 | geom_text(data = johnson, hjust = 1, size = 3, 48 | aes(x = Pitcher_and_Year, y = 0, label = Pitcher_and_Year)) + 49 | 50 | #Remove useless legend, y axis ticks and y axis text 51 | theme(legend.position = "none", axis.text.y = element_blank(), 52 | axis.ticks = element_blank()) 53 | 54 | # Balloon plot for Greinke 55 | greinke <- read.csv('greinke_sims.csv') 56 | rownames(greinke) <- greinke[,1] 57 | greinke <- greinke[-c(1)] 58 | 59 | dt <- as.table(as.matrix(greinke)) 60 | balloonplot(t(dt), main='Pitching Stats', xlab="", ylab="", 61 | label=TRUE, show.margins=FALSE) 62 | -------------------------------------------------------------------------------- /python_scripts/royals_monte_carlo.py: -------------------------------------------------------------------------------- 1 | # Library imports 2 | import pandas as pd 3 | import random 4 | 5 | 6 | # Define batting average class 7 | class WARSimulation: 8 | def __init__(self, df, name): 9 | self.df = df 10 | self.name = name 11 | 12 | def monte_carlo(self): 13 | mean = self.df['war'].mean() 14 | std = self.df['war'].std() 15 | selections = random.normalvariate(mean, std) 16 | return selections 17 | 18 | def run_the_simulation(self): 19 | x = 0 20 | selection = [] 21 | while x < 100: 22 | selection.append(self.monte_carlo()) 23 | x += 1 24 | 25 | data = pd.DataFrame({'war': selection}) 26 | data['war'] = data['war'].round(decimals=2) 27 | data.to_csv(self.name + '_results.csv', index=False) 28 | return 29 | 30 | 31 | if __name__ == "__main__": 32 | # Define dataframes 33 | hosmer_df = pd.DataFrame({'war': [1.5, -0.4, 3.5, 0.8, 3.6, 1.0]}) 34 | cain_df = pd.DataFrame({'war': [2.0, 3.2, 5.1, 7.2, 2.9]}) 35 | perez_df = pd.DataFrame({'war': [2.9, 4.1, 3.4, 2.3, 2.7]}) 36 | escobar_df = pd.DataFrame({'war': [0.5, 2.7, 3.4, 0.3, 2.5, 0.6, 0.3]}) 37 | gordon_df = pd.DataFrame({'war': [2.0, 2.8, -0.5, 7.2, 6.3, 4.2, 6.6, 2.8, 0.8]}) 38 | moustakes_df = pd.DataFrame({'war': [1.1, 3.1, -0.1, 0.4, 4.4]}) 39 | 40 | # Hosmer - simulation 41 | hosmer_selections = WARSimulation(hosmer_df, 'hosmer') 42 | hosmer_selections.run_the_simulation() 43 | 44 | # Cain - simulation 45 | cain_selections = WARSimulation(cain_df, 'cain') 46 | cain_selections.run_the_simulation() 47 | 48 | # Perez - simulation 49 | perez_selections = WARSimulation(perez_df, 'perez') 50 | perez_selections.run_the_simulation() 51 | 52 | # Escobar - simulation 53 | escobar_selections = WARSimulation(escobar_df, 'escobar') 54 | escobar_selections.monte_carlo() 55 | escobar_selections.run_the_simulation() 56 | 57 | # Gordan - simulation 58 | gordon_selections = WARSimulation(gordon_df, 'gordon') 59 | gordon_selections.run_the_simulation() 60 | 61 | # Moustakes - simulation 62 | moustakes_selections = WARSimulation(moustakes_df, 'moustakes') 63 | moustakes_selections.run_the_simulation() 64 | -------------------------------------------------------------------------------- /python_scripts/pitchfx_scraper.py: -------------------------------------------------------------------------------- 1 | # Import libraries 2 | from bs4 import BeautifulSoup 3 | import requests 4 | import pandas as pd 5 | 6 | 7 | def pitch_fx_scraper(base_url): 8 | # Isolate the text data 9 | data = base_url.text 10 | 11 | # Create beautiful soup object from the data 12 | soup = BeautifulSoup(data) 13 | 14 | # Put all the links in a list 15 | results = [] 16 | for gid in soup.find_all('a'): 17 | results.append(gid.get('href')) 18 | 19 | # Delete unnecessary elements of the list 20 | del results[0:21] 21 | 22 | # Extract the Game ID from each link 23 | results1 = [i.split('&prevGame=', 1)[1] for i in results] 24 | 25 | # Concatenate strings to create URLs 26 | results2 = list(map('http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=477132&game={0}'.format, results1)) 27 | 28 | # Scrape each link 29 | results3 = [] 30 | for i in results2: 31 | results3.append(requests.get(i)) 32 | 33 | # Grab the text for each link 34 | results4 = [] 35 | for i in results3: 36 | results4.append(i.text) 37 | 38 | # Make each a beautiful soup object 39 | results5 = [] 40 | for i in results4: 41 | results5.append(BeautifulSoup(i)) 42 | 43 | # Extract column headers 44 | sample = results5[1] 45 | column_headers = [th.getText() for th in 46 | sample.findAll('tr', limit=2)[0].findAll('th')] 47 | 48 | # Define data rows 49 | results6 = [] 50 | for i in results5: 51 | results6.append(i.findAll('tr')) 52 | 53 | # Get data from table 54 | results7 = [] 55 | for j in results6: 56 | results7.append([[td.getText() for td in j[i].findAll('td')] 57 | for i in range(len(j))]) 58 | 59 | # Convert to dataframe 60 | results8 = [] 61 | for i in results7: 62 | results8.append(pd.DataFrame(i)) 63 | 64 | for i in results8: 65 | i.columns = column_headers 66 | 67 | return 68 | 69 | 70 | if __name__ == "__main__": 71 | # Get the URL that we'll use to construct game IDs 72 | r = requests.get("http://www.brooksbaseball.net/tabs.php?player=477132&p_hand=1&ppos=1&cn=200&compType=none&gFilt=&time=month&minmax=ci&var=gl&s_type=2&startDate=03/30/2007&endDate=10/22/2016&balls=1&strikes=1&b_hand=1") 73 | pitch_fx_scraper(r) -------------------------------------------------------------------------------- /python_scripts/all_star_network_analysis.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import networkx as nx 5 | import community 6 | 7 | 8 | # Inspired by Complex Network Analysis in Python (Dmitry Zinoviev) 9 | def ingest_and_create_dataframe(): 10 | # Read in files from Lahman database 11 | all_star_df = pd.read_csv('AllStarFull.csv') 12 | people_df = pd.read_csv('People.csv') 13 | 14 | all_star_df = all_star_df.loc[all_star_df['yearID'] >= 1970] 15 | all_star_df['yearID'] = all_star_df['yearID'].astype('str') 16 | all_star_df['yearID'] = all_star_df['yearID'].str[2:4] 17 | all_star_df = all_star_df[['playerID', 'yearID']] 18 | 19 | people_df['name'] = people_df['nameFirst'] + ' ' + people_df['nameLast'] 20 | people_df = people_df[['playerID', 'name']] 21 | 22 | all_star_df = pd.merge(all_star_df, people_df, how='inner', on='playerID') 23 | all_star_df.drop('playerID', 1, inplace=True) 24 | all_star_df.columns = ['yearID', 'name'] 25 | 26 | all_game_ids = list(set(all_star_df['yearID'].tolist())) 27 | base_df = pd.DataFrame({'yearID': all_game_ids}) 28 | all_names = list(set(all_star_df['name'].tolist())) 29 | 30 | for name in all_names: 31 | print(name) 32 | temp_df = all_star_df.loc[all_star_df['name'] == name] 33 | temp_df['name'] = 1 34 | temp_df.rename(columns={'name': name}, inplace=True) 35 | base_df = pd.merge(base_df, temp_df, how='left', on='yearID') 36 | 37 | base_df.fillna(value=0, inplace=True) 38 | base_df.to_csv('all_star_df.csv', index=False) 39 | return base_df 40 | 41 | 42 | def prepare_network(df): 43 | df.set_index('yearID', inplace=True) 44 | 45 | # Create co-occurrence matrix 46 | cooc = df.dot(df.T) * (1 - np.eye(df.shape[0])) 47 | cooc.to_csv('cooc.csv') 48 | 49 | slicing = 3 50 | weights = cooc[cooc >= slicing] 51 | weights = weights.stack() 52 | weights = weights / weights.max() 53 | cd_network = weights.to_dict() 54 | cd_network = {key: float(value) for key, value in cd_network.items()} 55 | 56 | player_network = nx.Graph() 57 | player_network.add_edges_from(cd_network) 58 | nx.set_edge_attributes(player_network, 'weight', cd_network) 59 | 60 | partition = community.best_partition(player_network) 61 | nx.set_node_attributes(player_network, 'part', partition) 62 | 63 | if not os.path.isdir('results'): 64 | os.mkdir('results') 65 | 66 | with open('results/player_network.graphml', 'wb') as ofile: 67 | nx.write_graphml(player_network, ofile) 68 | return 69 | 70 | 71 | if __name__ == "__main__": 72 | network_df = ingest_and_create_dataframe() 73 | prepare_network(network_df) 74 | -------------------------------------------------------------------------------- /r_scripts/anomaly_detection_royals_stats.r: -------------------------------------------------------------------------------- 1 | #Load libraries and read in data 2 | library(AnomalyDetection) 3 | library(ggplot2) 4 | library(XML) 5 | 6 | royals.parse <-htmlParse("http://www.baseball-reference.com/teams/tgl.cgi?team=KCR&t=b&year=2016") 7 | royals.tab<-readHTMLTable(royals.parse, stringsAsFactors=FALSE) 8 | royals.df<-royals.tab[[1]] 9 | write.csv(royals.df, file = "royals2016.csv") 10 | str(royals.df) 11 | 12 | #The dataframe includes a couple of month headers; let's delete those 13 | #They are rows 24, 53, 81, 108, 138, 14 | royals <- royals.df[-c(24, 53, 81, 108, 138), ] 15 | 16 | #R read in all columns as characters; we'll need to covert the 17 | #appropriate columns to numeric values 18 | cols.num <- royals[c(1:2, 7:30)] 19 | royals1 <- data.frame(sapply(cols.num, as.numeric)) 20 | 21 | royals_factors <- data.frame(cbind(royals$Date, royals$X, royals$Opp, 22 | royals$Rslt,royals$Thr)) 23 | 24 | royals_final <- data.frame(royals1, royals_factors) 25 | names(royals_final)[27]<-"Date" 26 | names(royals_final)[28]<-"Opponent" 27 | names(royals_final)[29]<-"Result" 28 | names(royals_final)[30]<-"Pitcher_Throws" 29 | str(royals_final) 30 | 31 | #Anomaly detection on different offensive statistics 32 | 33 | #Runs 34 | anomr = AnomalyDetectionVec(royals_final$R, max_anoms=0.02, direction="both", 35 | period = 7, plot = TRUE) 36 | anomr$plot 37 | anomr 38 | 39 | #Home runs 40 | anomhr = AnomalyDetectionVec(royals_final$HR, max_anoms=0.02, direction="both", 41 | period = 7, plot = TRUE) 42 | anomhr$plot 43 | anomhr 44 | 45 | #Hits 46 | anomh = AnomalyDetectionVec(royals_final$H, max_anoms=0.02, direction="both", 47 | period = 7, plot = TRUE) 48 | anomh$plot 49 | anomh 50 | 51 | #Base on Balls 52 | anombb = AnomalyDetectionVec(royals_final$BB, max_anoms=0.02, direction="both", 53 | period = 7, plot = TRUE) 54 | anombb$plot 55 | anombb 56 | 57 | #Strikeouts 58 | anomso = AnomalyDetectionVec(royals_final$SO, max_anoms=0.02, direction="both", 59 | period = 7, plot = TRUE) 60 | anomso$plot 61 | anomso 62 | 63 | #Stolen Bases 64 | anomsb = AnomalyDetectionVec(royals_final$SB, max_anoms=0.02, direction="both", 65 | period = 7, plot = TRUE) 66 | anomsb$plot 67 | anomsb 68 | 69 | 70 | #Left on base 71 | anomlob = AnomalyDetectionVec(royals_final$LOB, max_anoms=0.02, direction="both", 72 | period = 7, plot = TRUE) 73 | anomlob$plot 74 | anomlob 75 | 76 | #Ground into double play 77 | anomgdp = AnomalyDetectionVec(royals_final$GDP, max_anoms=0.02, direction="both", 78 | period = 7, plot = TRUE) 79 | anomgdp$plot 80 | anomgdp 81 | 82 | #Hit by pitch 83 | anomhbp = AnomalyDetectionVec(royals_final$HBP, max_anoms=0.02, direction="both", 84 | period = 7, plot = TRUE) 85 | anomhbp$plot 86 | anomhbp 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /python_scripts/survival_ingestion_wrangling.py: -------------------------------------------------------------------------------- 1 | # Citation: https://www.cscu.cornell.edu/news/statnews/stnews67.pdf 2 | 3 | # Library imports 4 | import pymysql.cursors 5 | import pandas as pd 6 | from datetime import datetime 7 | 8 | 9 | # Database connection 10 | connection = pymysql.connect(host='localhost', 11 | user='xxxxx', 12 | password='xxxxx', 13 | db='lahman2016', 14 | charset='utf8mb4', 15 | cursorclass=pymysql.cursors.DictCursor) 16 | 17 | 18 | # Data ingestion 19 | def ingest_data(): 20 | master_query = ''' 21 | SELECT * from master;''' 22 | 23 | pitching_query = ''' 24 | SELECT sum(GS), playerID from pitching group by playerID;''' 25 | 26 | salaries_query = ''' 27 | SELECT yearID, salary, playerID from salaries;''' 28 | 29 | master = pd.read_sql(master_query, connection) 30 | pitching = pd.read_sql(pitching_query, connection) 31 | salaries = pd.read_sql(salaries_query, connection) 32 | return master, pitching, salaries 33 | 34 | 35 | def data_cleaning(master, pitching, salaries): 36 | # Put salaries in 2016 dollars 37 | inflation = pd.read_csv('inflation_conversion.csv') 38 | salaries = pd.merge(salaries, inflation, how = 'left', on = 'yearID') 39 | salaries.fillna(value = 1, inplace = True) 40 | 41 | salaries['adjusted_salary'] = salaries['salary'] / salaries['CF'] 42 | 43 | # Get each players average salary 44 | salaries = salaries.groupby(['playerID'])['adjusted_salary'].mean() 45 | salaries = pd.DataFrame(salaries) 46 | salaries.reset_index(inplace = True) 47 | 48 | current = pd.read_csv('http://seamheads.com/baseballgauge/downloads/events.csv') 49 | current.rename(columns={' BAT_ID':'retroID'}, inplace=True) 50 | current = current[['GAME_ID', 'retroID']] 51 | current.drop_duplicates(subset = 'retroID', inplace = True) 52 | 53 | # Merge the data 54 | df = pd.merge(salaries, master, how = 'left', on = 'playerID') 55 | df = pd.merge(df, pitching, how = 'left', on = 'playerID') 56 | df = pd.merge(df, current, how = 'left', on = 'retroID') 57 | 58 | # Keep only position players 59 | df = df.loc[df['sum(GS)'] == 0] 60 | return df 61 | 62 | 63 | # Mark current players as being censored 64 | def prep_data_for_survival_analysis(df): 65 | df.rename(columns={'GAME_ID': 'Censored'}, inplace=True) 66 | df['Censored'].fillna(value = 2, inplace = True) 67 | 68 | def label_censor (row): 69 | if row['Censored'] == 2: 70 | return 2 71 | else: 72 | return 1 73 | 74 | df['Censored'] = df.apply (lambda row: label_censor(row), axis=1) 75 | 76 | # Age at time of debut 77 | # Time being in the MLB 78 | df['birthYear'] = df['birthYear'].astype('str') 79 | df['birthYear'] = df['birthYear'].str[:4] 80 | 81 | df['birthMonth'] = df['birthMonth'].astype('str') 82 | 83 | numbers_map = {'1.0': '01', '2.0': '02', '3.0': '03', '4.0': '04', '5.0': '05', 84 | '6.0': '06', '7.0': '07', '8.0': '08', '9.0': '09', '10.0': '10', 85 | '11.0': '11', '12.0': '12'} 86 | 87 | df['birthMonth'] = df['birthMonth'].map(numbers_map) 88 | 89 | df['birthDay'] = df['birthDay'].astype('str') 90 | df['birthDay'] = df['birthDay'].str[:-2] 91 | 92 | df['birthday'] = df['birthYear'] + '-' + df['birthMonth'] + '-' + df['birthDay'] 93 | 94 | df['birthday'] = pd.to_datetime(df['birthday']) 95 | df['debut'] = pd.to_datetime(df['debut']) 96 | df['finalGame'] = pd.to_datetime(df['finalGame']) 97 | 98 | df['age_at_debut'] = df['debut'] - df['birthday'] 99 | df['time_in_mlb'] = df['finalGame'] - df['debut'] 100 | 101 | # Select columns for analysis 102 | df = df[['adjusted_salary', 'birthCountry', 'weight', 'height', 'bats', 103 | 'throws', 'debut', 'finalGame', 'age_at_debut', 104 | 'time_in_mlb', 'Censored']] 105 | 106 | df.columns = ['average_salary', 'birth_country', 'weight', 'height', 'hits', 107 | 'throws', 'debut', 'final_game', 'age_at_debut', 108 | 'time_in_mlb', 'status'] 109 | return df 110 | 111 | 112 | if __name__ == "__main__": 113 | master, pitching, salaries = ingest_data() 114 | df = data_cleaning(master, pitching, salaries) 115 | df = prep_data_for_survival_analysis(df) 116 | df.to_csv('player_data_for_survival_analysis.csv', index = False) 117 | 118 | -------------------------------------------------------------------------------- /r_scripts/arrieta_no_hitter_analysis.r: -------------------------------------------------------------------------------- 1 | #Create simple web scraper to retrieve data from brooksbaseball.net 2 | library(XML) 3 | arrieta <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=453562&game=gid_2016_04_21_chnmlb_cinmlb_1/&s_type=3&h_size=700&v_size=50") 4 | arrieta.tab<-readHTMLTable(arrieta, stringsAsFactors=FALSE) 5 | arrieta.df<-arrieta.tab[[1]] 6 | write.csv(arrieta.df, file = "arrieta.csv") 7 | 8 | #Inspect the data 9 | str(arrieta.df) 10 | #unfortunately, everything has been read in as characters 11 | 12 | #Instead, let's read in the CSV of the data, which should eliminate this issue 13 | arrieta <- read.csv("arrieta.csv") 14 | summary(arrieta) 15 | 16 | #Let's start by looking at Arrietta's pitch speed during the night 17 | plot(ecdf(arrieta$start_speed), 18 | main = "Cumulative Distribution of Pitch Speed", 19 | ylab = "Cumulative Proportion", 20 | xlab = "Pitch Speed", 21 | yaxt = "n") 22 | axis (side=2, at=seq(0, 1, by=0.1), las=1, labels=paste(seq(0, 100, by=10), 23 | "%", sep=" ")) 24 | abline(h=0.9, lty=3) 25 | abline(v=quantile(arrieta$start_speed, pr=0.9), lty=3) 26 | 27 | #Let's now take a look at horitzontal and vertical movement of his pitches 28 | library(ggplot2) 29 | p <- ggplot(arrieta, aes(x=pfx_x, y=pfx_z)) 30 | p + geom_point() + stat_density2d() + ggtitle("Density of Vertical and Hortizontal Pitch Movement") 31 | 32 | #Let's visualize the difference in Arrietta's pitches based on pitch velocity, 33 | #movement,and spin 34 | ggplot(arrieta, aes(start_speed, fill = mlbam_pitch_name)) + 35 | geom_histogram(binwidth = 1) + facet_wrap(~ mlbam_pitch_name) + 36 | ggtitle("Pitch Speed Histogram by Pitch Type") 37 | 38 | ggplot(arrieta, aes(x=pfx_x, y=pfx_z)) + 39 | geom_point(shape=19) + facet_wrap(~ mlbam_pitch_name) + 40 | geom_smooth() + ggtitle("Vertical and Horizontal Movement by Pitch") 41 | 42 | #Delete rows containing CH 43 | without_ch <- arrieta[-c(69, 90), ] 44 | ggplot(without_ch, aes(spin, fill = mlbam_pitch_name)) + 45 | geom_density() + facet_wrap(~ mlbam_pitch_name) + 46 | ggtitle("Spin by Pitch Type") 47 | 48 | #Subset data by different play result to see if we can deduce any insights 49 | ball <- subset(arrieta, pdes == "Ball") 50 | called_strike <- subset(arrieta, pdes == "Called Strike") 51 | foul <- subset(arrieta, pdes == "Foul") 52 | in_play_outs <- subset(arrieta, pdes == "In play, out(s)") 53 | swinging_strike <- subset(arrieta, pdes == "Swinging Strike") 54 | 55 | summary(ball) 56 | summary(called_strike) 57 | summary(foul) 58 | summary(in_play_outs) 59 | summary(swinging_strike) 60 | 61 | #Look at Arrieta's performance throughout the game 62 | mean(arrieta$start_speed) 63 | aggregate(start_speed ~ inning + mlbam_pitch_name, data = arrieta, mean) 64 | 65 | p1 <- ggplot(arrieta, aes(x=inning, y=mlbam_pitch_name, fill=start_speed)) 66 | p1 + geom_tile() + scale_fill_gradient2(midpoint=92, low="blue", high="red") + 67 | ggtitle("Pitch Speed by Inning") + 68 | scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9)) 69 | 70 | library(dplyr) 71 | early <- filter(arrieta, inning == 1 | inning == 2 | inning == 3 ) 72 | mid <- filter(arrieta, inning == 4 | inning == 5 | inning == 6 ) 73 | late <- filter(arrieta, inning == 7 | inning == 8 | inning == 9 ) 74 | 75 | summary(early) 76 | prop.table(table(early$mlbam_pitch_name)) 77 | mean(early$start_speed) 78 | 79 | summary(mid) 80 | prop.table(table(mid$mlbam_pitch_name)) 81 | mean(mid$start_speed) 82 | 83 | summary(late) 84 | summary(mid) 85 | prop.table(table(late$mlbam_pitch_name)) 86 | mean(late$start_speed) 87 | 88 | #Can we predict which pitch Arrieta will throw next? 89 | 90 | #Read in new dataset 91 | prediction <- read.csv("arrieta_prediction.csv") 92 | summary(prediction) 93 | str(prediction) 94 | 95 | prediction$strikes <- as.factor(prediction$strikes) 96 | prediction$balls <- as.factor(prediction$balls) 97 | 98 | #Create training and test sets 99 | library(caret) 100 | 101 | inTrain <- createDataPartition(y=prediction$pitch_type,p=0.75, list=FALSE) 102 | training <- prediction[inTrain,] 103 | testing <- prediction[-inTrain,] 104 | 105 | #Decision Tree and Random Forest 106 | library(rpart) 107 | library(rattle) 108 | tree <- rpart(pitch_type ~ ., method="class", data=training) 109 | printcp(tree) 110 | print(tree) 111 | fancyRpartPlot(tree) 112 | 113 | library(randomForest) 114 | fit_rf <- randomForest(pitch_type ~ ., data=training) 115 | print(fit_rf) 116 | 117 | head(fit_rf$votes) 118 | importance(fit_rf) 119 | barplot(fit_rf$importance[ , 1 ], main="Importance of Variables in Random Forest", 120 | cex.names =0.5) 121 | 122 | tree_predictions <- predict(tree, testing, type="class") 123 | table <- data.frame(tree_predictions, testing$pitch_type) 124 | table 125 | confusionMatrix(tree_predictions, testing$pitch_type) 126 | 127 | rf_predictions <- predict(fit_rf, testing, type="class") 128 | table2 <- data.frame(rf_predictions, testing$pitch_type) 129 | table2 130 | confusionMatrix(rf_predictions, testing$pitch_type) 131 | 132 | rf_predictions2 <- predict(fit_rf, testing, type="prob") 133 | table3 <- data.frame(rf_predictions2, testing$pitch_type) 134 | table3 135 | -------------------------------------------------------------------------------- /r_scripts/chris_sale_lca.r: -------------------------------------------------------------------------------- 1 | #Can Latent Class Analysis identify Chris Sale's pitches? 2 | 3 | #Libraries 4 | library(XML) 5 | library(ggplot2) 6 | library(poLCA) 7 | library(dplyr) 8 | library(MASS) 9 | library(colorRamps) 10 | 11 | #Create simple web scraper to retrieve data from brooksbaseball.net 12 | #Let's scrape the last five games of the 2016 regular season 13 | 14 | #Game 1 15 | game1 <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=519242&game=gid_2016_10_02_minmlb_chamlb_1/&s_type=3&h_size=700&v_size=500") 16 | game1.tab <-readHTMLTable(game1, stringsAsFactors=FALSE) 17 | game1.df <- game1.tab[[1]] 18 | 19 | #Game 2 20 | game2 <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=519242&game=gid_2016_09_27_tbamlb_chamlb_1/&s_type=3&h_size=700&v_size=500") 21 | game2.tab <-readHTMLTable(game2, stringsAsFactors=FALSE) 22 | game2.df <- game2.tab[[1]] 23 | 24 | #Game 3 25 | game3 <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=519242&game=gid_2016_09_21_chamlb_phimlb_1/&s_type=3&h_size=700&v_size=500") 26 | game3.tab <-readHTMLTable(game3, stringsAsFactors=FALSE) 27 | game3.df <- game3.tab[[1]] 28 | 29 | #Game 4 30 | game4 <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=519242&game=gid_2016_09_16_chamlb_kcamlb_1/&s_type=3&h_size=700&v_size=500") 31 | game4.tab <-readHTMLTable(game4, stringsAsFactors=FALSE) 32 | game4.df <- game4.tab[[1]] 33 | 34 | #Game 5 35 | game5 <-htmlParse("http://www.brooksbaseball.net/pfxVB/tabdel_expanded.php?pitchSel=519242&game=gid_2016_09_11_kcamlb_chamlb_1/&s_type=3&h_size=700&v_size=500") 36 | game5.tab <-readHTMLTable(game5, stringsAsFactors=FALSE) 37 | game5.df <- game5.tab[[1]] 38 | 39 | #Combine all the dataframes 40 | sale <- rbind(game1.df, game2.df, game3.df, game4.df, game5.df) 41 | 42 | #All the columns were read in as characters, which isn't what we want 43 | #Let's write the file to a CSV and read it back in, which should fix the issue 44 | write.csv(sale, "sale.csv") 45 | sale <- read.csv("sale.csv") 46 | 47 | #Alright, it looks like everything read in correctly 48 | 49 | #Let's create a pie chart of Sale's different pitches 50 | ggplot(sale, aes(x = factor(1), fill = factor(mlbam_pitch_name))) + 51 | geom_bar(width = 1) + coord_polar(theta = "y") + 52 | ggtitle("Pitches Thrown in Final 5 Games of 2016") + ylab(" ") + 53 | xlab(" ") + scale_y_continuous(breaks = sale$mlbam_pitch_names, 54 | labels=sale$mlbam_pitch_name) 55 | 56 | #Let's run multiple latent class models to see how they perform 57 | #We'll use the following variables in the model: 58 | #spin, pfx_x, pfx_z, vx0, vy0, vz0, ax, ay, az, start_speed 59 | #https://fastballs.wordpress.com/2007/08/02/glossary-of-the-gameday-pitch-fields/ 60 | 61 | #First, though, we need to convert the numeric data into 62 | #categorical data; let's split the variables into quartiles 63 | sale.sub <- sale[c("spin", "pfx_x", "pfx_z", "vx0", "vy0", "vz0", 64 | "ax", "ay", "az", "start_speed")] 65 | 66 | quartile <- function(x) { 67 | ntile(x, 4) 68 | } 69 | 70 | sale.sub <- apply(sale.sub, 2, quartile) 71 | sale.sub <- data.frame(sale.sub) 72 | sale.sub <- data.frame(sapply(sale.sub, as.factor)) 73 | summary(sale.sub) 74 | 75 | #Sale throws three pitches, so we're most interested in the lc3 model 76 | f <- cbind(spin, pfx_x, pfx_z, vx0, vy0, vz0, ax, ay, az, start_speed)~1 77 | set.seed(200) 78 | lc2 <- poLCA(f, sale.sub, nclass=2, graph = TRUE) 79 | lc3 <- poLCA(f, sale.sub, nclass=3, graph = TRUE) 80 | lc4 <- poLCA(f, sale.sub, nclass=4, graph = TRUE) 81 | 82 | #Since we know Sale throws three pitches, it doesn't make sense to 83 | #run more models, though the lc4 had a better AIC than the lc3 84 | #Let's dive deeper in the lc3 results 85 | 86 | #Look at predictions and probabilities for each observation 87 | probs <- lc3$posterior 88 | head(probs) 89 | 90 | preds <- lc3$predclass 91 | head(preds) 92 | 93 | #Create a dataframe of predictions and probabilities assigned to each observation 94 | prediction_frame <- data.frame(preds, probs) 95 | 96 | #Create a data frame of the the original numeric data and the declared pitch type 97 | sale.original <- sale[c("spin", "pfx_x", "pfx_z", "vx0", "vy0", "vz0", 98 | "ax", "ay", "az", "start_speed", "mlbam_pitch_name")] 99 | 100 | #Bind the data frames 101 | sale.final <- data.frame(sale.original, prediction_frame) 102 | 103 | #Clean the names of the columns from the prediction frame 104 | names(sale.final)[12] <- "Predicted_Class" 105 | names(sale.final)[13] <- "Class1_Prob" 106 | names(sale.final)[14] <- "Class2_Prob" 107 | names(sale.final)[15] <- "Class3_Prob" 108 | 109 | #Change the predicted class to a factor 110 | sale.final$Predicted_Class <- as.factor(sale.final$Predicted_Class) 111 | 112 | #Get summaries of the data 113 | sale_lca1 <- subset(sale.final, Predicted_Class=="1") 114 | summary(sale_lca1) 115 | 116 | sale_lca2 <- subset(sale.final, Predicted_Class=="2") 117 | summary(sale_lca2) 118 | 119 | sale_lca3 <- subset(sale.final, Predicted_Class=="3") 120 | summary(sale_lca3) 121 | 122 | #Parallel coordinates plot to view the classes 123 | r <- (sale.final$Predicted_Class) 124 | parcoord(sale.final[1:10], col=r) 125 | -------------------------------------------------------------------------------- /python_scripts/home_runs.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.stats import norm 4 | import requests 5 | from bs4 import BeautifulSoup 6 | from time import sleep 7 | from sklearn.preprocessing import MinMaxScaler 8 | 9 | 10 | def create_aggregate_file(df): 11 | grouped = pd.DataFrame(df.groupby('yearID')['HR'].sum()) 12 | grouped.reset_index(inplace=True) 13 | grouped.columns = ['year', 'home_runs'] 14 | grouped[['year', 'home_runs']] = grouped[['year', 'home_runs']].astype('int') 15 | return grouped 16 | 17 | 18 | def find_mean_and_variance(df, year, rolling_period, excluded_years): 19 | cutoff = year - rolling_period 20 | df_lag1 = df.loc[df['year'] == cutoff - 1] 21 | df_lag2 = df.loc[df['year'] == cutoff - 2] 22 | 23 | df = df.loc[(df['year'] >= cutoff) & (df['year'] < year)] 24 | len_1 = len(df) 25 | df = df.loc[~df['year'].isin(excluded_years)] 26 | len_2 = len(df) 27 | 28 | diff = len_1 - len_2 29 | if diff == 1: 30 | df = pd.concat([df_lag1, df]) 31 | elif diff == 2: 32 | df = pd.concat([df_lag2, df]) 33 | 34 | mean = df['home_runs'].mean() 35 | var = df['home_runs'].std() 36 | return mean, var 37 | 38 | 39 | def scrape_2019_home_runs(): 40 | master_df = pd.DataFrame() 41 | for page in range(1, 27): 42 | url = 'https://www.foxsports.com/mlb/stats?season=2019&category=BATTING&group=1&sort=7&time=0&pos=0&qual=1&' \ 43 | 'sortOrder=0&splitType=0&page={0}&statID=0'.format(page) 44 | page = requests.get(url) 45 | soup = BeautifulSoup(page.text) 46 | table = soup.findAll('tr') 47 | data = ([[td.getText() for td in table[i].findAll('td')] for i in range(len(table))]) 48 | df = pd.DataFrame(data) 49 | master_df = master_df.append(df) 50 | sleep(5) 51 | master_df.to_csv('2019_hr.csv') 52 | return master_df 53 | 54 | 55 | def fit_pdf_and_cdf(): 56 | df_2019 = pd.read_csv('2019_hr.csv') 57 | hr_total_2019 = df_2019['8'].sum() 58 | batting_df = pd.read_csv('baseballdatabank-2019.2/core/Batting.csv') 59 | batting_df = create_aggregate_file(batting_df) 60 | df_2019 = pd.DataFrame({'home_runs': [hr_total_2019], 'year': [2019]}) 61 | batting_df = pd.concat([batting_df, df_2019], axis=0) 62 | batting_df.to_csv('full_data_df.csv', index=False) 63 | 64 | pdf_df = pd.DataFrame() 65 | for year in range(1980, 2020): 66 | temp_items = find_mean_and_variance(batting_df, year, 20, excluded_years=[1981, 1994]) 67 | temp_batting_df = (batting_df.loc[batting_df['year'] == year]).reset_index(drop=True) 68 | temp_hr_value = temp_batting_df['home_runs'][0] 69 | temp_pdf = norm(loc=temp_items[0], scale=temp_items[1]).pdf(temp_hr_value) 70 | temp_cdf = norm(loc=temp_items[0], scale=temp_items[1]).cdf(temp_hr_value) 71 | temp_pdf_df = pd.DataFrame({ 72 | 'year': [year], 73 | 'home_runs': [temp_hr_value], 74 | 'rolling_mean_hr': temp_items[0], 75 | 'rolling_std': temp_items[1], 76 | 'pdf': [temp_pdf * 100], 77 | 'cdf': [temp_cdf] 78 | }) 79 | pdf_df = pdf_df.append(temp_pdf_df) 80 | 81 | pdf_df['var_from_average'] = abs(pdf_df['home_runs'] - pdf_df['rolling_mean_hr']) 82 | pdf_df.to_csv('pdf_df.csv', index=False) 83 | return 84 | 85 | 86 | def calculate_summary_stats(): 87 | batting_df = pd.read_csv('baseballdatabank-2019.2/core/Batting.csv') 88 | batting_df = batting_df[['yearID', 'HR', 'AB']] 89 | batting_df.columns = ['year', 'home_runs', 'at_bats'] 90 | 91 | df_2019 = pd.read_csv('2019_hr.csv') 92 | df_2019 = df_2019[['8', '3']] 93 | df_2019['year'] = '2019' 94 | df_2019.rename(columns={'8': 'home_runs', '3': 'at_bats'}, inplace=True) 95 | df_2019.dropna(inplace=True) 96 | 97 | master_df = pd.concat([batting_df, df_2019], axis=0) 98 | master_df.to_csv('all_home_runs.csv', index=False) 99 | 100 | master_df = master_df.loc[master_df['at_bats'] >= 150] 101 | grouped = master_df.groupby('year').agg({'home_runs': ['mean', 'median', 'std']}) 102 | grouped.reset_index(inplace=True) 103 | grouped.columns = grouped.columns.droplevel() 104 | grouped.columns = ['year', 'mean', 'median', 'std'] 105 | 106 | scalar = MinMaxScaler() 107 | grouped['mean_scaled'] = scalar.fit_transform(grouped['mean'].values.reshape(-1, 1)) 108 | grouped['median_scaled'] = scalar.fit_transform(grouped['median'].values.reshape(-1, 1)) 109 | grouped['std_scaled'] = scalar.fit_transform(grouped['std'].values.reshape(-1, 1)) 110 | 111 | q = pd.DataFrame(master_df.groupby('year')['home_runs'].quantile(q=np.linspace(.10, .90, 9))) 112 | q.reset_index(inplace=True) 113 | q.columns = ['year', 'quantile', 'home_runs'] 114 | q = q.pivot(index='year', columns='quantile', values='home_runs') 115 | q.reset_index(inplace=True) 116 | 117 | q.columns = ['year', 'quantile_0.1', 'quantile_0.2', 'quantile_0.3', 'quantile_0.4', 'quantile_0.5', 118 | 'quantile_0.6', 'quantile_0.7', 'quantile_0.8', 'quantile_0.9'] 119 | 120 | grouped = pd.merge(grouped, q, how='left', on='year') 121 | grouped.to_csv('yearly_summary.csv', index=False) 122 | return 123 | 124 | 125 | if __name__ == "__main__": 126 | scrape_2019_home_runs() 127 | calculate_summary_stats() 128 | fit_pdf_and_cdf() 129 | -------------------------------------------------------------------------------- /r_scripts/smoltz_text_analysis.r: -------------------------------------------------------------------------------- 1 | # HoF text mining 2 | # Load libraries 3 | library(quanteda) 4 | library(tm) 5 | library(data.table) 6 | library(topicmodels) 7 | library(ggplot2) 8 | 9 | #############Text Analysis############# 10 | # Read in data 11 | smoltz <- read.csv('smoltz_speech.csv') 12 | 13 | #Convert the tweet text into a corpus, the object R needs for text analysis 14 | masterCorpus <- Corpus(VectorSource(smoltz$text)) 15 | masterCorpus 16 | 17 | #Remove punctuation and numbers from the text; convert all text to lower case 18 | masterCorpus <- tm_map(masterCorpus, removePunctuation) 19 | masterCorpus <- tm_map(masterCorpus, removeNumbers) 20 | masterCorpus <- tm_map(masterCorpus, tolower) 21 | 22 | #Remove common stop words as well as bespoke list of words 23 | masterCorpus <- tm_map(masterCorpus, removeWords, stopwords("english")) 24 | masterCorpus <- tm_map(masterCorpus, removeWords, c("how", "why", "be", "here", "there", "via", 25 | "amp", "there", "will", "can", "see", "new", 26 | "sap", "help", "find", "get", "make", "watch", 27 | "take", "learn", "need", "one", "now", "just", 28 | "like", "cant", "got", "much", "say", "way", 29 | "going", "dont", "said", "ever", "doesnt", 30 | "come", "since","wont","saying","didnt", 31 | "every", "hes","youre", "still", "ive", 32 | "use", "even", "u", "ut")) 33 | 34 | #Stem the document to have R read items like "analyze" and "analyzed" as one term 35 | #Stripe white space created by removed words and treat the corpus as plain text 36 | masterCorpus <- tm_map(masterCorpus, stemDocument) 37 | masterCorpus <- tm_map(masterCorpus, stripWhitespace) 38 | masterCorpus <- tm_map(masterCorpus, PlainTextDocument) 39 | 40 | #create document term matrix, which tells us which words were used in each document (i.e. a tweet) 41 | dtm <- DocumentTermMatrix(masterCorpus) 42 | dtm 43 | 44 | #Sum the number of times each word was used 45 | freq <- colSums(as.matrix(dtm)) 46 | 47 | #Order the number of times by how often they were used 48 | ord <- order(freq) 49 | freq[tail(ord)] 50 | 51 | #Convert the list of words to a dataframe, which will be easier to work with 52 | wf <- data.frame(word=names(freq), freq=freq) 53 | write.csv(wf, file = "smoltz_unigrams.csv") 54 | 55 | # Create chart of most used words 56 | p <- ggplot(subset(wf, freq > 10), aes(word, freq, fill = "blue")) 57 | p <- p + geom_bar(stat="identity") + ggtitle('Smoltz Most Used Words') 58 | p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))+ 59 | theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position = "none") 60 | p 61 | 62 | #Create a Quanteda corpus from the TM corpus 63 | smoltz$text <- as.character(smoltz$text) 64 | corpus <- corpus(smoltz$text) 65 | 66 | #Create a document term matrix of bi-grams, groups of two words, and sort the results by popularity 67 | dfm.bi <- dfm(corpus, ignoredFeatures = stopwords("english"), stem = TRUE, ngrams = 2, verbose = FALSE) 68 | dfm.bi.freq <- colSums(dfm.bi) 69 | dfm.bi.freq <- sort(dfm.bi.freq, decreasing=TRUE) 70 | 71 | #Take out bi-grams used fewer than ten times 72 | dfm.bi.freq.prune <- as.numeric() 73 | for (i in 1:length(dfm.bi.freq)) { 74 | if (dfm.bi.freq[i] > 2) { 75 | dfm.bi.freq.prune <- c(dfm.bi.freq.prune, dfm.bi.freq[i]) } 76 | } 77 | 78 | #Convert results to a data frame 79 | bigrams <- data.frame(dfm.bi.freq.prune) 80 | 81 | #Change rownames to a column and rename columns 82 | setDT(bigrams, keep.rownames = TRUE)[] 83 | names(bigrams)[1] <- "word" 84 | names(bigrams)[2] <- "freq" 85 | 86 | write.csv(bigrams, file = "smoltz_bigrams.csv") 87 | 88 | p <- ggplot(subset(bigrams, freq > 3), aes(word, freq, fill = "blue")) 89 | p <- p + geom_bar(stat="identity") + ggtitle('Smoltz Most Used Bigramss') 90 | p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))+ 91 | theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position = "none") 92 | p 93 | 94 | #Remove sparse terms from the document term matrix 95 | dtms <- removeSparseTerms(dtm, 0.98) 96 | dtms 97 | 98 | #Cluster words that often appear together 99 | d <- dist(t(dtms), method="euclidian") 100 | kfit <- kmeans(d, 10) 101 | clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0) 102 | clusters <- data.frame(kfit$cluster) 103 | print(clusters) 104 | 105 | #Create a document term matrix for topic modeling 106 | dfm.uni <- dfm(corpus, ignoredFeatures = stopwords("english"), stem = TRUE, verbose = FALSE) 107 | 108 | #Run LDA topic model 109 | burnin <- 4000 110 | iter <- 2000 111 | thin <- 500 112 | seed <-list(2003,5,63,100001,765) 113 | nstart <- 5 114 | best <- TRUE 115 | k <- 5 116 | 117 | ldaOut <-LDA(dtm,k, method='Gibbs', control=list(nstart=nstart, 118 | seed = seed, 119 | best=best, 120 | burnin = burnin, 121 | iter = iter, thin=thin)) 122 | 123 | ldaOut.topics <- as.matrix(terms(ldaOut, 6)) 124 | 125 | -------------------------------------------------------------------------------- /r_scripts/multidimensional_scaling_2016_teams.r: -------------------------------------------------------------------------------- 1 | #Multidimensional scaling and hierarchical clustering on 2016 offensive statistics 2 | 3 | #Load libraries 4 | library(XML) 5 | library(plyr) 6 | library(dplyr) 7 | library(stringr) 8 | library(Hmisc) 9 | library(MASS) 10 | library(cluster) 11 | 12 | #Scrape a dataset that includes each team's abbreviation 13 | #We'll isolate the names and put them in a list, which will be used to scrape 14 | #the data we really want 15 | abbreviations <- htmlParse("http://www.baseball-reference.com/leagues/MLB/2016.shtml") 16 | abbreviations.tab <- readHTMLTable(abbreviations, stringsAsFactors=FALSE) 17 | abbreviations.df <- abbreviations.tab[[2]] 18 | 19 | #Create list of teams 20 | teams <- list(abbreviations.df$Tm) 21 | teams <- sapply(teams, "[", c(1:30)) 22 | 23 | #Let's now scrape per-game offensive statistics for each team 24 | fetch_offense <- function(team) { 25 | url <- paste0("http://www.baseball-reference.com/teams/tgl.cgi?team=", team, "&t=b&year=2016.com") 26 | data <- readHTMLTable(url, stringsAsFactors = FALSE) 27 | data <- data[[1]] 28 | data$team <- team 29 | data 30 | } 31 | 32 | #I get a script out of bounds error when I run a loop; looks like we may have to manually 33 | #insert each team 34 | ARI_offense <- ldply("ARI", fetch_offense, .progress="text") 35 | ATL_offense <- ldply("ATL", fetch_offense, .progress="text") 36 | BAL_offense <- ldply("BAL", fetch_offense, .progress="text") 37 | CHW_offense <- ldply("CHW", fetch_offense, .progress="text") 38 | CIN_offense <- ldply("CIN", fetch_offense, .progress="text") 39 | COL_offense <- ldply("COL", fetch_offense, .progress="text") 40 | DET_offense <- ldply("DET", fetch_offense, .progress="text") 41 | HOU_offense <- ldply("HOU", fetch_offense, .progress="text") 42 | KCR_offense <- ldply("KCR", fetch_offense, .progress="text") 43 | LAA_offense <- ldply("LAA", fetch_offense, .progress="text") 44 | MIA_offense <- ldply("MIA", fetch_offense, .progress="text") 45 | MIL_offense <- ldply("MIL", fetch_offense, .progress="text") 46 | MIN_offense <- ldply("MIN", fetch_offense, .progress="text") 47 | NYM_offense <- ldply("NYM", fetch_offense, .progress="text") 48 | NYY_offense <- ldply("NYY", fetch_offense, .progress="text") 49 | OAK_offense <- ldply("OAK", fetch_offense, .progress="text") 50 | PHI_offense <- ldply("PHI", fetch_offense, .progress="text") 51 | PIT_offense <- ldply("PIT", fetch_offense, .progress="text") 52 | SDP_offense <- ldply("SDP", fetch_offense, .progress="text") 53 | SEA_offense <- ldply("SEA", fetch_offense, .progress="text") 54 | SFG_offense <- ldply("SFG", fetch_offense, .progress="text") 55 | STL_offense <- ldply("STL", fetch_offense, .progress="text") 56 | TBR_offense <- ldply("TBR", fetch_offense, .progress="text") 57 | TOR_offense <- ldply("TOR", fetch_offense, .progress="text") 58 | 59 | #The scraper did not work on the following teams for some reason, 60 | #so I created a slightly different scraper for just these ones 61 | fetch_offense2 <- function(team) { 62 | url <- paste0("http://www.baseball-reference.com/teams/tgl.cgi?team=", team, "&t=b&year=2016.com") 63 | data <- readHTMLTable(url, stringsAsFactors = FALSE) 64 | data <- data[[2]] 65 | data$team <- team 66 | data 67 | } 68 | 69 | TEX_offense <- ldply("TEX", fetch_offense2, .progress="text") 70 | WSN_offense <- ldply("WSN", fetch_offense2, .progress="text") 71 | LAD_offense <- ldply("LAD", fetch_offense2, .progress="text") 72 | CLE_offense <- ldply("CLE", fetch_offense2, .progress="text") 73 | BOS_offense <- ldply("BOS", fetch_offense2, .progress="text") 74 | CHC_offense <- ldply("CHC", fetch_offense2, .progress="text") 75 | 76 | #Bind the data frames 77 | offense <- rbind(ARI_offense, ATL_offense, BAL_offense, BOS_offense, CHC_offense, 78 | CHW_offense, CIN_offense, CLE_offense, COL_offense, DET_offense, HOU_offense, KCR_offense, 79 | LAA_offense, LAD_offense, MIA_offense, MIL_offense, MIN_offense, NYM_offense, NYY_offense, 80 | OAK_offense, PHI_offense, PIT_offense, SDP_offense, SEA_offense, SFG_offense, STL_offense, 81 | TBR_offense, TEX_offense, TOR_offense, WSN_offense) 82 | 83 | #Remove monthly headers in the dataframe 84 | offense <- offense[!grepl("PA", offense$Opp),] 85 | 86 | #Select only the columns we need and convert to the correct data type 87 | offense_num <- subset(offense, select = c(7:24, 29)) 88 | offense_cat <- offense[33] 89 | 90 | offense_cat$team <- as.factor(offense_cat$team) 91 | offense_num <- data.frame(sapply(offense_num, as.numeric)) 92 | 93 | offense_sub <- cbind(offense_cat, offense_num) 94 | 95 | #Aggregate the stats by team 96 | totals <- aggregate(. ~ team, offense_sub, sum) 97 | 98 | #Calculate, BA, OBP, and SLG for each team 99 | totals$BA <- totals$H / totals$AB 100 | 101 | totals$OBP <- (totals$H + totals$BB + totals$HBP) / (totals$AB + totals$BB + totals$HBP + 102 | totals$SF) 103 | 104 | totals$X1B <- totals$H - (totals$X2B + totals$X3B + totals$HR) 105 | totals$SLG <- (totals$X1B + totals$X2B * 2 + totals$X3B * 3 + totals$HR * 4) / totals$AB 106 | 107 | #Apply multidimensional scaling to the data 108 | rownames(totals) <- totals[, 1] 109 | totals <- totals[2:24] 110 | totals <- scale(totals) 111 | 112 | team.dist <- dist(totals) 113 | team.mds <- cmdscale(team.dist) 114 | 115 | plot(team.mds, type = "n") 116 | text(team.mds, row.names(team.mds)) 117 | 118 | #Apply hierarchical clustering to the data 119 | set.seed(100) 120 | teams_hclust <- totals 121 | dm = dist(teams_hclust,method="euclidean") 122 | hclust_teams <- hclust(dm, method="complete") 123 | plot(hclust_teams) 124 | -------------------------------------------------------------------------------- /r_scripts/historical_team_clustering.r: -------------------------------------------------------------------------------- 1 | #Read in and inspect data 2 | library(Lahman) #https://cran.r-project.org/web/packages/Lahman/Lahman.pdf 3 | data(Teams) #http://rpackages.ianhowson.com/rforge/Lahman/man/Teams.html 4 | summary(Teams) 5 | 6 | teams_subset <- Teams[c(7, 15:23, 27:28, 30:38)] 7 | 8 | #Isolate selected numeric variables 9 | #We'll also create a reference data frame, that will just have team name and year 10 | #The reference set will be handy later 11 | teams_subset <- Teams[c(7, 15:23, 27:28, 30:32, 34:38)] 12 | teams_reference <- Teams[c(1, 4, 7, 15:23, 27:28, 30:32, 33:38)] 13 | 14 | #Remove teams with missing values 15 | teams_subset <- na.omit(teams_subset) 16 | teams_reference <- na.omit(teams_reference) 17 | 18 | #On the reference set, drop everything but year and franchise IDs 19 | teams_reference <- teams_reference[c(1:2)] 20 | 21 | #Make each variable per-game rather than an aggregate 22 | teams_final <- sweep(teams_subset,1,unlist(teams_subset[,1]),"/") 23 | summary(teams_final) 24 | 25 | #Drop the games column 26 | teams_final <- teams_final[(-1)] 27 | 28 | #Create a visual of correlations among variables 29 | library(corrplot) 30 | library(gplots) 31 | 32 | correlations <- cor(teams_final) 33 | correlations <- round(correlations, digits=2) 34 | 35 | corrplot(correlations) 36 | corrplot(correlations, method="shade", shade.col=NA, tl.col="black") 37 | 38 | #Look at relationships between selected variables 39 | #To avoid over-plotting, we'll use the hexbin package 40 | library(hexbin) 41 | library(ggplot2) 42 | p <- ggplot(teams_final, aes(x=E, y=RA)) 43 | p + stat_binhex() + 44 | scale_fill_gradient(low="lightblue", high="red") + 45 | ggtitle("Relationship Between Errors Per Game \n and Runs Allowed Per Game") 46 | 47 | p1 <- ggplot(teams_final, aes(x=HR, y=R)) 48 | p1 + stat_binhex() + 49 | scale_fill_gradient(low="lightblue", high="red") + 50 | ggtitle("Relationship Between HR Per Game \n and Runs Per Game") 51 | 52 | #Develop parallel coordinates plot of variables 53 | library(MASS) 54 | library(colorRamps) 55 | 56 | c <- blue2red(100) 57 | r <- cut(teams_final$SHO, 100) 58 | parcoord(teams_final, col=c[as.numeric(r)]) 59 | 60 | h <- cut(teams_final$HR, 100) 61 | parcoord(teams_final, col=c[as.numeric(h)]) 62 | 63 | #Conduct k-means cluster 64 | teams_scaled <- scale(teams_final) 65 | wss <- (nrow(teams_scaled)-1)*sum(apply(teams_scaled,2,var)) 66 | for (i in 2:15) wss[i] <- sum(kmeans(teams_scaled, 67 | centers=i)$withinss) 68 | plot(1:15, wss, type="b", xlab="Number of Clusters", 69 | ylab="Within groups sum of squares", main = "Elbow Plot for No. of Clusters") 70 | 71 | set.seed(500) 72 | fit1 <- kmeans(teams_final, 6, nstart=25) 73 | 74 | library(cluster) 75 | set.seed(500) 76 | clusplot(teams_scaled, fit1$cluster, color=TRUE, shade=TRUE, 77 | labels=2, lines=0, main = "PCA Plot of K-Means Cluster") 78 | 79 | teams_final <- data.frame(teams_final, fit1$cluster) 80 | cluster1 <- teams_final[which(teams_final$fit1.cluster=='1'),] 81 | cluster2 <- teams_final[which(teams_final$fit1.cluster=='2'),] 82 | cluster3 <- teams_final[which(teams_final$fit1.cluster=='3'),] 83 | cluster4 <- teams_final[which(teams_final$fit1.cluster=='4'),] 84 | cluster5 <- teams_final[which(teams_final$fit1.cluster=='5'),] 85 | cluster6 <- teams_final[which(teams_final$fit1.cluster=='6'),] 86 | 87 | summary(cluster1) 88 | summary(cluster2) 89 | summary(cluster3) 90 | summary(cluster4) 91 | summary(cluster5) 92 | summary(cluster6) 93 | 94 | #Merge clusters with reference dataset of teams and years 95 | teams_reference <- data.frame(teams_reference, fit1$cluster) 96 | teams1 <- teams_reference[which(teams_reference$fit1.cluster=='1'),] 97 | teams2 <- teams_reference[which(teams_reference$fit1.cluster=='2'),] 98 | teams3 <- teams_reference[which(teams_reference$fit1.cluster=='3'),] 99 | teams4 <- teams_reference[which(teams_reference$fit1.cluster=='4'),] 100 | teams5 <- teams_reference[which(teams_reference$fit1.cluster=='5'),] 101 | teams6 <- teams_reference[which(teams_reference$fit1.cluster=='6'),] 102 | team_clusters <- rbind(teams1, teams2, teams3, teams4, teams5, teams6) 103 | write.csv(team_clusters, file = "Historical Team Clustering Results.csv") 104 | 105 | print(teams1) 106 | teams1$yearID <- as.factor(as.integer(teams1$yearID)) 107 | summary(teams1, 50) 108 | 109 | print(teams2) 110 | teams2$yearID <- as.factor(as.integer(teams2$yearID)) 111 | summary(teams2, 50) 112 | 113 | print(teams3) 114 | teams3$yearID <- as.factor(as.integer(teams3$yearID)) 115 | summary(teams3, 50) 116 | 117 | print(teams4) 118 | teams4$yearID <- as.factor(as.integer(teams4$yearID)) 119 | summary(teams4, 50) 120 | 121 | print(teams5) 122 | teams5$yearID <- as.factor(as.integer(teams5$yearID)) 123 | summary(teams5, 50) 124 | 125 | print(teams6) 126 | teams6$yearID <- as.factor(as.integer(teams6$yearID)) 127 | summary(teams6, 50) 128 | 129 | #Conduct hierarchical cluster 130 | library(cluster) 131 | set.seed(100) 132 | teams_hclust <- teams_scaled 133 | dm = dist(teams_hclust,method="euclidean") 134 | hclust_teams <- hclust(dm, method="complete") 135 | plot(hclust_teams) 136 | 137 | plot(cut(as.dendrogram(hclust_teams), h=8)$lower[[2]]) 138 | teams_hclust[c(307, 324), ] 139 | teams_reference[c(307, 324), ] 140 | teams_hclust[c(307, 304), ] 141 | teams_reference[c(307, 304), ] 142 | 143 | plot(cut(as.dendrogram(hclust_teams), h=4)$lower[[30]]) 144 | teams_hclust[c(768, 944), ] 145 | teams_reference[c(768, 944), ] 146 | teams_hclust[c(944, 771), ] 147 | teams_reference[c(944, 771), ] 148 | 149 | #Conduct PCA on the data 150 | library("factoextra") 151 | library("FactoMineR") 152 | teams_final2 <- teams_final[c(-20:-21)] 153 | 154 | teams_pca <- prcomp(teams_final2, scale = TRUE) 155 | summary(teams_pca) 156 | fviz_screeplot(teams_pca, ncp=10) 157 | 158 | pca.var <- get_pca_var(teams_pca) 159 | pca.var 160 | pca.var$contrib 161 | pca.var$coord 162 | 163 | fviz_contrib(teams_pca, choice = "var", axes = 1) 164 | fviz_contrib(teams_pca, choice = "var", axes = 2) 165 | 166 | fviz_pca_var(teams_pca) 167 | fviz_pca_var(teams_pca, col.var="contrib") 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /r_scripts/hosmer_statcast_analysis.r: -------------------------------------------------------------------------------- 1 | ##Read in CSV and inspect data 2 | hosmer <- read.csv("hosmer_stats_updated.csv") 3 | summary(hosmer) 4 | 5 | ##Bar charts of selected variables 6 | library(ggplot2) 7 | qplot(factor(pitch_type), data=hosmer, geom="bar", fill=factor(pitch_type)) + 8 | ggtitle("Types of Pitches Hit in Play") 9 | 10 | qplot(factor(events), data=hosmer, geom="bar", fill=factor(events)) + 11 | ggtitle("Result of Balls in Play") + coord_flip() 12 | 13 | ##Pitch type analysis 14 | #Let's run a chi-squared test to see if hit location depends on pitch type 15 | table1 <- table(hosmer$pitch_type, hosmer$hit_location) 16 | table1 17 | chisq.test(table1) 18 | 19 | ##Let's look at hit distance, speed, and angle 20 | #First, let's inspect means and standard deviations by pitch type for hit distance, speed, and angle 21 | tapply(hosmer$hit_distance_sc, hosmer$pitch_type, mean) 22 | tapply(hosmer$hit_distance_sc, hosmer$pitch_type, sd) 23 | tapply(hosmer$hit_speed, hosmer$pitch_type, mean) 24 | tapply(hosmer$hit_speed, hosmer$pitch_type, sd) 25 | tapply(hosmer$hit_angle, hosmer$pitch_type, mean) 26 | tapply(hosmer$hit_angle, hosmer$pitch_type, sd) 27 | 28 | #Plot lowess lines to explore the relationship between variables 29 | ggplot(hosmer, aes(x=hit_speed, y=hit_angle)) + 30 | geom_point() + 31 | geom_smooth() + ggtitle("Relationship Between Hit Angle and Hit Speed") 32 | 33 | ggplot(hosmer, aes(x=hit_angle, y=hit_distance_sc)) + 34 | geom_point(shape=19) + 35 | geom_smooth() + ggtitle("Relationship Between Hit Distance and Hit Angle") 36 | 37 | ggplot(hosmer, aes(x=hit_speed, y=hit_distance_sc)) + 38 | geom_point(shape=19) + 39 | geom_smooth() + ggtitle("Relationship Between Hit Distance and Hit Speed") 40 | 41 | #Let's look at different styles of density plots for the result of balls put in play 42 | ggplot(hosmer, aes(hit_speed, colour = description)) + 43 | geom_density() + ggtitle("Density by Result of Hit") 44 | 45 | ggplot(hosmer, aes(hit_speed, fill = description)) + 46 | geom_density(position="stack") + ggtitle("Density by Result of Hit") 47 | 48 | ggplot(hosmer, aes(hit_speed, fill = description)) + 49 | geom_density(position="fill") + ggtitle("Density by Result of Hit") 50 | 51 | #Viloin plot of hit speed by type of pitch 52 | g<-ggplot(hosmer, aes(x=pitch_type, y=hit_speed)) 53 | g + geom_violin(alpha=0.5, color="gray")+geom_jitter(alpha=0.5, aes(color=pitch_type), 54 | position = position_jitter(width = 0.1))+ 55 | coord_flip() + ggtitle("Hit Speed by Pitch Type") 56 | 57 | ##Segment pitch break length into quartiles and see how Hosmer handles pitches with greater break 58 | library(data.table) 59 | setDT(hosmer) 60 | hosmer[,quartile:=cut(break_length, 61 | breaks=quantile(break_length,probs=seq(0,1,by=1/4)), 62 | labels=1:4,right=F)] 63 | 64 | #Hit speed histograms faceted by break length 65 | ggplot(hosmer, aes(hit_speed, fill = quartile)) + 66 | geom_histogram(binwidth = 10) + facet_wrap(~ quartile) + 67 | ggtitle("Hit Speed by Quartile of Pitch Break Length") 68 | 69 | #Let's look at some specific scenarios 70 | aggregate(hit_distance_sc ~ pitch_type + inning, data = hosmer, mean) 71 | aggregate(hit_distance_sc ~ pitch_type + inning, data = hosmer, length) 72 | 73 | hosmer$outs_when_up <- as.factor(as.numeric(hosmer$outs_when_up)) 74 | aggregate(hit_distance_sc ~ pitch_type + inning + outs_when_up, data = hosmer, mean) 75 | aggregate(hit_distance_sc ~ pitch_type + inning + outs_when_up, data = hosmer, length) 76 | 77 | #Let's look at the impact of the count 78 | hosmer$balls <- as.factor(as.integer(hosmer$balls)) 79 | hosmer$strikes <- as.factor(as.integer(hosmer$strikes)) 80 | 81 | ggplot(hosmer, aes(hit_speed, fill = balls)) + 82 | geom_density(position="fill") + ggtitle("Hit Speed by Number of Balls") 83 | 84 | ggplot(hosmer, aes(hit_speed, fill = strikes)) + 85 | geom_density(position="fill") + ggtitle("Hit Speed by Number of Strikes") 86 | 87 | ggplot(hosmer, aes(hit_angle, fill = balls)) + 88 | geom_density(position="fill") + ggtitle("Hit Angle by Number of Balls") 89 | 90 | ggplot(hosmer, aes(hit_angle, fill = strikes)) + 91 | geom_density(position="fill") + ggtitle("Hit Angle by Number of Strikes") 92 | 93 | ggplot(hosmer, aes(hit_distance_sc, fill = balls)) + 94 | geom_density(position="fill") + ggtitle("Hit Distance by Number of Balls") 95 | 96 | ggplot(hosmer, aes(hit_distance_sc, fill = strikes)) + 97 | geom_density(position="fill") + ggtitle("Hit Distance by Number of Strikes") 98 | 99 | aggregate(hit_distance_sc ~ strikes + inning + outs_when_up, data = hosmer, mean) 100 | aggregate(hit_distance_sc ~ strikes + inning + outs_when_up, data = hosmer, length) 101 | 102 | #Lastly, let's do a k-means cluster of hit distance, speed, and angle 103 | #Start by selecting the desired columns and scaling the data 104 | library(dplyr) 105 | hosmer1 <- subset(hosmer, select = c(54, 55, 56)) 106 | 107 | hosmer2 <- scale(hosmer1) 108 | head(hosmer2) 109 | 110 | #Elbow plot to determine the number of clusters 111 | wss <- (nrow(hosmer2)-1)*sum(apply(hosmer2,2,var)) 112 | for (i in 2:15) wss[i] <- sum(kmeans(hosmer2, 113 | centers=i)$withinss) 114 | plot(1:15, wss, type="b", xlab="Number of Clusters", 115 | ylab="Within groups sum of squares", main = "Elbow Plot for No. of Clusters") 116 | 117 | #K-Means cluster with k=3 118 | set.seed(600) 119 | fit1 <- kmeans(hosmer2, 3) 120 | hosmer3 <- data.frame(hosmer1, fit1$cluster) 121 | hosmer <- data.frame(hosmer, fit1$cluster) 122 | head(hosmer3, 5) 123 | 124 | #Look at appended clusters in full and stripped down datasets 125 | cluster1 <- hosmer3[which(hosmer3$fit1.cluster=='1'),] 126 | cluster2 <- hosmer3[which(hosmer3$fit1.cluster=='2'),] 127 | cluster3 <- hosmer3[which(hosmer3$fit1.cluster=='3'),] 128 | summary(cluster1) 129 | summary(cluster2) 130 | summary(cluster3) 131 | 132 | cluster1a <- hosmer[which(hosmer$fit1.cluster=='1'),] 133 | cluster2a <- hosmer[which(hosmer$fit1.cluster=='2'),] 134 | cluster3a <- hosmer[which(hosmer$fit1.cluster=='3'),] 135 | summary(cluster1a) 136 | summary(cluster2a) 137 | summary(cluster3a) 138 | 139 | #PCA plot of clusters 140 | library(cluster) 141 | set.seed(500) 142 | clusplot(hosmer2, fit1$cluster, color=TRUE, shade=TRUE, 143 | labels=2, lines=0, main = "PCA Plot of K-Means Cluster") 144 | -------------------------------------------------------------------------------- /python_scripts/pitching_markov_chain.py: -------------------------------------------------------------------------------- 1 | # Citation: http://iacs-courses.seas.harvard.edu/courses/am207/blog/lecture-18.html 2 | 3 | # Library imports 4 | import pandas as pd 5 | import random 6 | import csv 7 | 8 | 9 | def run_pitching_markov_chain(): 10 | 11 | # Read in data 12 | df = pd.read_csv('scherzer_pitches.csv') 13 | 14 | # Change pitch names 15 | df.rename(columns={'15': 'pitch'}, inplace=True) 16 | 17 | pitch_dict = {'CH': 'CH', 'CU': 'CU', 'FA': 'OT', 18 | 'FC': 'OT', 'FF': 'FF', 'FT': 'OT', 19 | 'IN': 'OT', 'PO': 'OT', 'SL': 'SL', 20 | 'UN': 'OT'} 21 | 22 | df['pitch'] = df['pitch'].map(pitch_dict) 23 | 24 | # Transition Matrix 25 | transitions = {} 26 | row_sums = {} 27 | 28 | for line in open('scherzer_pitch_sequences.csv'): 29 | s, e = line.rstrip().split(',') 30 | transitions[(s, e)] = transitions.get((s, e), 0.) + 1 31 | row_sums[s] = row_sums.get(s, 0.) + 1 32 | 33 | for k, v in transitions.iteritems(): 34 | s, e = k 35 | transitions[k] = v / row_sums[s] 36 | 37 | with open('scherzer_transitions.csv', 'wb') as f: 38 | w = csv.DictWriter(f, transitions.keys()) 39 | w.writeheader() 40 | w.writerow(transitions) 41 | 42 | # Emission probability calculations 43 | def calculate_emission_probabilities(df): 44 | df.rename(columns={'19': 'strikes'}, inplace=True) 45 | df.rename(columns={'20': 'balls'}, inplace=True) 46 | 47 | df['strikes'] = df['strikes'].astype('str') 48 | df['balls'] = df['balls'].astype('str') 49 | df['count'] = df['balls'] + '-' + df['strikes'] 50 | 51 | pitch_totals = df['pitch'].groupby(df['pitch']).count() 52 | pitch_totals = pd.DataFrame(pitch_totals) 53 | pitch_totals.rename(columns={'pitch': 'pitch_total'}, inplace=True) 54 | pitch_totals.reset_index(inplace = True) 55 | 56 | pitches_in_counts = df['pitch'].groupby([df['count'], df['pitch']]).count() 57 | pitches_in_counts = pd.DataFrame(pitches_in_counts) 58 | pitches_in_counts.rename(columns={'pitch': 'pitch_situations'}, inplace=True) 59 | pitches_in_counts.reset_index(inplace = True) 60 | 61 | pitches_in_counts = pd.merge(pitches_in_counts, pitch_totals, 62 | how = 'inner', on = 'pitch') 63 | 64 | pitches_in_counts['pitch_percentage'] = pitches_in_counts['pitch_situations'] /\ 65 | pitches_in_counts['pitch_total'] 66 | 67 | return pitches_in_counts 68 | 69 | pitches_in_counts = calculate_emission_probabilities(df) 70 | 71 | count_dict = {'0.0-0.0': "'0-0'", '0.0-1.0': "'0-1'", '0.0-2.0': "'0-2'", '1.0-0.0': "'1-0'", 72 | '1.0-1.0': "'1-1'", '1.0-2.0': "'1-2'", '2.0-0.0': "'2-0'", '2.0-1.0': "'2-1'", 73 | '2.0-2.0': "'2-2'", '3.0-0.0': "'3-0'", '3.0-1.0': "'3-1'", '3.0-2.0': "'3-2'"} 74 | 75 | pitches_in_counts['count'] = pitches_in_counts['count'].map(count_dict) 76 | pitches_in_counts.to_csv('pitches_in_counts.csv', index = False) 77 | 78 | # Set up states and probabilities 79 | states = ('Fourseam', 'Change', 'Slider', 'Curve', 'Other') 80 | 81 | observations = ('0-0', '0-1', '0-2', '1-0', '1-1', '1-2', '2-0', '2-1', 82 | '2-2', '3-0', '3-1', '3-2') 83 | 84 | start_probability = {'Fourseam': 0.50, 'Change': 0.20, 'Slider': 0.20, 85 | 'Curve': 0.05, 'Other': 0.05} 86 | 87 | transition_probability = { 88 | 'Fourseam' : {'Fourseam': 0.57, 'Change': 0.19, 'Slider': 0.17, 89 | 'Curve': 0.05, 'Other': 0.02}, 90 | 91 | 'Change' : {'Fourseam': 0.61, 'Change': 0.21, 'Slider': 0.12, 92 | 'Curve': 0.04, 'Other': 0.02}, 93 | 94 | 'Slider' : {'Fourseam': 0.58, 'Change': 0.09, 'Slider': 0.27, 95 | 'Curve': 0.03, 'Other': 0.03}, 96 | 97 | 'Curve' : {'Fourseam': 0.61, 'Change': 0.21, 'Slider': 0.09, 98 | 'Curve': 0.08, 'Other': 0.01}, 99 | 100 | 'Other' : {'Fourseam': 0.34, 'Change': 0.18, 'Slider': 0.15, 101 | 'Curve': 0.27, 'Other': 0.06}, 102 | } 103 | 104 | emission_probability = { 105 | 'Fourseam' : {'0-0': 0.27, '0-1': 0.13, '0-2': 0.07, '1-0': 0.09, 106 | '1-1': 0.10, '1-2': 0.10, '2-0': 0.04, '2-1': 0.05, 107 | '2-2': 0.08, '3-0': 0.01, '3-1': 0.02, '3-2': 0.05}, 108 | 109 | 'Change' : {'0-0': 0.17, '0-1': 0.12, '0-2': 0.07, '1-0': 0.11, 110 | '1-1': 0.12, '1-2': 0.15, '2-0': 0.02, '2-1': 0.05, 111 | '2-2': 0.13, '3-0': 0.0, '3-1': 0.0, '3-2': 0.05}, 112 | 113 | 'Slider' : {'0-0': 0.25, '0-1': 0.16, '0-2': 0.10, '1-0': 0.08, 114 | '1-1': 0.10, '1-2': 0.14, '2-0': 0.0, '2-1': 0.03, 115 | '2-2': 0.10, '3-0': 0.0, '3-1': 0.0, '3-2': 0.03}, 116 | 117 | 'Curve' : {'0-0': 0.34, '0-1': 0.18, '0-2': 0.10, '1-0': 0.05, 118 | '1-1': 0.10, '1-2': 0.12, '2-0': 0.0, '2-1': 0.01, 119 | '2-2': 0.09, '3-0': 0.0, '3-1': 0.0, '3-2': 0.02}, 120 | 121 | 'Other' : {'0-0': 0.25, '0-1': 0.14, '0-2': 0.07, '1-0': 0.10, 122 | '1-1': 0.10, '1-2': 0.09, '2-0': 0.03, '2-1': 0.05, 123 | '2-2': 0.08, '3-0': 0.02, '3-1': 0.02, '3-2': 0.04} 124 | } 125 | 126 | # A HMM is created from the above matices for 100 of Scherzer's pitches 127 | # Both hidden and visible states are generated 128 | N = 100 129 | hidden = [] 130 | visible = [] 131 | 132 | if random.random() < start_probability[states[0]]: 133 | hidden.append(states[0]) 134 | else: 135 | hidden.append(states[1]) 136 | 137 | for i in xrange(N): 138 | current_state = hidden[i] 139 | if random.random() < transition_probability[current_state][states[0]]: 140 | hidden.append(states[0]) 141 | else: 142 | hidden.append(states[1]) 143 | r = random.random() 144 | prev = 0 145 | for observation in observations: 146 | prev += emission_probability[current_state][observation] 147 | if r < prev: 148 | visible.append(observation) 149 | break 150 | 151 | hidden.pop() 152 | 153 | # Run the Viterbi algorithm 154 | def viterbi(obs, states, start_p, trans_p, emit_p): 155 | V = [{}] 156 | path = {} 157 | 158 | for y in states: 159 | V[0][y] = start_p[y] * emit_p[y][obs[0]] 160 | path[y] = [y] 161 | 162 | for t in range(1, len(obs)): 163 | V.append({}) 164 | newpath = {} 165 | 166 | for y in states: 167 | (prob, state) = max((V[t-1][y0] * trans_p[y0][y] * emit_p[y][obs[t]], y0) for y0 in states) 168 | V[t][y] = prob 169 | newpath[y] = path[state] + [y] 170 | 171 | path = newpath 172 | 173 | (prob, state) = max((V[t][y], y) for y in states) 174 | return (prob, path[state]) 175 | 176 | # Input the generated markov model 177 | def example_model(): 178 | return viterbi(visible, 179 | states, 180 | start_probability, 181 | transition_probability, 182 | emission_probability) 183 | 184 | (prob, p_hidden) = example_model() 185 | 186 | # Assess accuracy of the model 187 | wrong= 0 188 | for i in range(len(hidden)): 189 | if hidden[i] != p_hidden[i]: 190 | wrong = wrong + 1 191 | print "accuracy: " + str(1-float(wrong)/N) 192 | return 193 | 194 | 195 | if __name__ == "__main__": 196 | run_pitching_markov_chain() 197 | -------------------------------------------------------------------------------- /python_scripts/pitcher_similarity.py: -------------------------------------------------------------------------------- 1 | # Citation: Programming Collective Intelligence by Toby Segaran 2 | # Import Libraries 3 | import os 4 | 5 | import pandas as pd 6 | import pymysql.cursors 7 | from math import sqrt 8 | from sklearn.preprocessing import MinMaxScaler 9 | 10 | 11 | ## Connect to the database 12 | connection = pymysql.connect(host='localhost', 13 | user='root', 14 | password='xxxxx', 15 | db='xxxxx', 16 | charset='utf8mb4', 17 | cursorclass=pymysql.cursors.DictCursor) 18 | 19 | 20 | def ingest_data(): 21 | pitchers_query = ''' 22 | select 23 | concat(master.nameFirst, ' ', master.nameLast) as "Name", 24 | pitching.yearID as "Year", 25 | pitching.W as "Wins", 26 | pitching.L as "Losses", 27 | pitching.G as "Appearances", 28 | pitching.GS as "Games_Started", 29 | pitching.CG as "Complete_Games", 30 | pitching.SHO as "Shutouts", 31 | pitching.SV as "Saves", 32 | pitching.IPouts as "Outs_Recorded", 33 | pitching.H as "Hits_Surrendered", 34 | pitching.ER as "Earned_Runs", 35 | pitching.HR as "Home_Runs_Surrendered", 36 | pitching.BB as "Walks_Surrendered", 37 | pitching.SO as "Strikeouts", 38 | pitching.BAOpp as "Opponent_Batting_Average", 39 | pitching.ERA as "ERA", 40 | pitching.R as "Runs_Surrendered" 41 | 42 | from pitching 43 | inner join master on pitching.playerID = master.playerID 44 | 45 | where pitching.YearID >= 1900;''' 46 | 47 | pitchers = pd.read_sql(pitchers_query, connection) 48 | return pitchers 49 | 50 | def clean_data(pitchers): 51 | pitchers = pitchers.dropna() 52 | 53 | pitchers['Year'] = pitchers['Year'].astype('str') 54 | pitchers['Player_and_Year'] = pitchers['Name'] + ' ' + pitchers['Year'] 55 | 56 | pitchers['Decisions'] = pitchers['Wins'] + pitchers['Losses'] 57 | pitchers['Wins_Over_Decisions'] = pitchers['Wins'] / pitchers['Decisions'] 58 | pitchers['Wins_Over_Starts'] = pitchers['Wins'] / pitchers['Games_Started'] 59 | 60 | pitchers['Relief_Appearances'] = pitchers['Appearances']\ 61 | - pitchers['Games_Started'] 62 | 63 | pitchers['Shutout_Percentage'] = pitchers['Shutouts']\ 64 | / pitchers['Games_Started'] 65 | 66 | pitchers['Outs_Recorded_Per_Appearance'] = pitchers['Outs_Recorded']\ 67 | /pitchers['Appearances'] 68 | 69 | pitchers['Hits_Allowed_Per_Appearance'] = pitchers['Hits_Surrendered']\ 70 | /pitchers['Appearances'] 71 | 72 | pitchers['Earned_Runs_Per_Appearance'] = pitchers['Earned_Runs']\ 73 | /pitchers['Appearances'] 74 | 75 | pitchers['Runs_Per_Appearance'] = pitchers['Runs_Surrendered']\ 76 | /pitchers['Appearances'] 77 | 78 | pitchers['Home_Runs_Per_Appearance'] = pitchers['Home_Runs_Surrendered']\ 79 | /pitchers['Appearances'] 80 | 81 | pitchers['Walks_Per_Appearance'] = pitchers['Walks_Surrendered']\ 82 | /pitchers['Appearances'] 83 | 84 | pitchers['Strikeouts_Per_Appearance'] = pitchers['Strikeouts']\ 85 | /pitchers['Appearances'] 86 | 87 | pitchers = pitchers[['Player_and_Year', 'Decisions', 'Wins_Over_Decisions', 88 | 'Wins_Over_Starts', 'Relief_Appearances', 89 | 'Shutout_Percentage', 'Outs_Recorded_Per_Appearance', 90 | 'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance', 91 | 'Runs_Per_Appearance', 'Home_Runs_Per_Appearance', 92 | 'Walks_Per_Appearance', 'Strikeouts_Per_Appearance', 93 | 'ERA']] 94 | 95 | pitchers = pitchers.fillna(value=0) 96 | pitchers['Wins_Over_Starts'] = pitchers['Wins_Over_Starts'].astype('str') 97 | pitchers['Wins_Over_Starts'] = pitchers['Wins_Over_Starts'].str.replace('inf', '0') 98 | pitchers['Wins_Over_Starts'] = pitchers['Wins_Over_Starts'].astype('float') 99 | pitchers['Decisions'] = pitchers['Decisions'].astype('int') 100 | pitchers['Relief_Appearances'] = pitchers['Relief_Appearances'].astype('int') 101 | 102 | return pitchers 103 | 104 | 105 | # Scale data 106 | def scale_data(pitchers): 107 | num_data = pitchers[['Decisions', 'Wins_Over_Decisions', 108 | 'Wins_Over_Starts', 'Relief_Appearances', 109 | 'Shutout_Percentage', 'Outs_Recorded_Per_Appearance', 110 | 'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance', 111 | 'Runs_Per_Appearance', 'Home_Runs_Per_Appearance', 112 | 'Walks_Per_Appearance', 'Strikeouts_Per_Appearance', 113 | 'ERA']] 114 | 115 | scaler = MinMaxScaler() 116 | scaler.fit(num_data) 117 | num_data = scaler.transform(num_data) 118 | num_data = pd.DataFrame(num_data) 119 | 120 | num_data.columns = ['Decisions', 'Wins_Over_Decisions', 121 | 'Wins_Over_Starts', 'Relief_Appearances', 122 | 'Shutout_Percentage', 'Outs_Recorded_Per_Appearance', 123 | 'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance', 124 | 'Runs_Per_Appearance', 'Home_Runs_Per_Appearance', 125 | 'Walks_Per_Appearance', 'Strikeouts_Per_Appearance', 126 | 'ERA'] 127 | 128 | pitchers = pitchers[['Player_and_Year']] 129 | 130 | pitchers = pd.merge(pitchers, num_data, how='inner', left_index=True, 131 | right_index=True) 132 | 133 | return pitchers 134 | 135 | 136 | # Create dictionary of pitchers 137 | def create_dictionary(pitchers): 138 | pitchers_melted = pd.melt(pitchers, id_vars=['Player_and_Year'], 139 | value_vars=['Decisions', 'Wins_Over_Decisions', 140 | 'Wins_Over_Starts', 'Relief_Appearances', 141 | 'Shutout_Percentage', 'Outs_Recorded_Per_Appearance', 142 | 'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance', 143 | 'Runs_Per_Appearance', 'Home_Runs_Per_Appearance', 144 | 'Walks_Per_Appearance', 'Strikeouts_Per_Appearance', 145 | 'ERA']) 146 | 147 | 148 | player_dictionary = pitchers_melted.groupby('Player_and_Year').apply(lambda x: x.set_index\ 149 | ('variable')['value'].to_dict()).to_dict() 150 | 151 | return player_dictionary 152 | 153 | 154 | # Euclidean Distance Function 155 | def sim_distance(atts, p1, p2): 156 | si = {} 157 | for item in atts[p1]: 158 | if item in atts[p2]: 159 | si[item] = 1 160 | 161 | if len(si) == 0: 162 | return 0 163 | 164 | sum_of_squares = sum([pow(atts[p1][item] - atts[p2][item], 2) for item in 165 | atts[p1] if item in atts[p2]]) 166 | 167 | return 1 / (1 + sqrt(sum_of_squares)) 168 | 169 | 170 | # Get top matches 171 | def top_matches(atts, person, n=15, similarity=sim_distance): 172 | 173 | scores = [(similarity(atts, person, other), other) for other in atts 174 | if other != person] 175 | scores.sort() 176 | scores.reverse() 177 | return scores[0:n] 178 | 179 | 180 | # Run the similarity analysis 181 | def get_top_matches(player_and_year): 182 | df = top_matches(player_dictionary, player_and_year) 183 | df = pd.DataFrame(df) 184 | df.columns = ['Similarity', 'Pitcher_and_Year'] 185 | return df 186 | 187 | if __name__ == "__main__: 188 | pitchers = ingest_data() 189 | pitchers = clean_data(pitchers) 190 | pitchers = scale_data(pitchers) 191 | player_dictionary = create_dictionary(pitchers) 192 | 193 | pedro2000 = get_top_matches('Pedro Martinez 2000') 194 | clemens1997 = get_top_matches('Roger Clemens 1997') 195 | johnson2002 = get_top_matches('Randy Johnson 2002') 196 | greinke2009 = get_top_matches('Zack Greinke 2009') 197 | maddux1992 = get_top_matches('Greg Maddux 1992') 198 | schilling2001 = get_top_matches('Curt Schilling 2001') 199 | rivera2004 = get_top_matches('Mariano Rivera 2004') 200 | gagne2003 = get_top_matches('Eric Gagne 2003') 201 | 202 | pedro2000.to_csv('pedro2000.csv', index=False) 203 | clemens1997.to_csv('clemens1997.csv', index=False) 204 | johnson2002.to_csv('johnson2002.csv', index=False) 205 | greinke2009.to_csv('greinke2009.csv', index=False) 206 | maddux1992.to_csv('maddux1992.csv', index=False) 207 | schilling2001.to_csv('schilling2001.csv', index=False) 208 | rivera2004.to_csv('rivera2004.csv', index=False) 209 | gagne2003.to_csv('gagne2003.csv', index=False) 210 | 211 | -------------------------------------------------------------------------------- /r_scripts/mlb_attendance_analysis.r: -------------------------------------------------------------------------------- 1 | #Identify links from which to scrape the data 2 | #We'll scrape data from 1995-2015; I want the data to be post-strike 3 | #attendance: http://www.baseball-reference.com/leagues/MLB/1990-misc.shtml 4 | #standings: http://www.baseball-reference.com/leagues/MLB/1990-standings.shtml 5 | #pitching: http://www.baseball-reference.com/leagues/MLB/1990-standard-pitching.shtml 6 | #fielding: http://www.baseball-reference.com/leagues/MLB/1990-standard-fielding.shtml 7 | #batting: http://www.baseball-reference.com/leagues/MLB/1990-standard-batting.shtml 8 | 9 | #Load libraries 10 | library(XML) 11 | library(ggplot2) 12 | library(plyr) 13 | library(dplyr) 14 | library(car) 15 | library(data.table) 16 | library(stringr) 17 | library(alluvial) 18 | library (glmnet) 19 | 20 | #Scrape attendance data 21 | fetch_attendance <- function(year) { 22 | url <- paste0("http://www.baseball-reference.com/leagues/MLB/", year, "-misc.shtml") 23 | data <- readHTMLTable(url, stringsAsFactors = FALSE) 24 | data <- data[[1]] 25 | data$year <- year 26 | data 27 | } 28 | 29 | attendance <- ldply(1995:2015, fetch_attendance, .progress="text") 30 | 31 | #Scrape standings data 32 | fetch_standings <- function(year1) { 33 | url1 <- paste0("http://www.baseball-reference.com/leagues/MLB/", year1, "-standings.shtml") 34 | data1 <- readHTMLTable(url1, stringsAsFactors = FALSE) 35 | data1 <- data1[[2]] 36 | data1$year1 <- year1 37 | data1 38 | } 39 | 40 | standings <- ldply(1995:2015, fetch_standings, .progress="text") 41 | 42 | #Scrape pitching data 43 | fetch_pitching <- function(year2) { 44 | url2 <- paste0("http://www.baseball-reference.com/leagues/MLB/", year2, "-standard-pitching.shtml") 45 | data2 <- readHTMLTable(url2, stringsAsFactors = FALSE) 46 | data2 <- data2[[1]] 47 | data2$year2 <- year2 48 | data2 49 | } 50 | 51 | pitching <- ldply(1995:2015, fetch_pitching, .progress="text") 52 | 53 | #Scrape fielding data 54 | fetch_fielding <- function(year3) { 55 | url3 <- paste0("http://www.baseball-reference.com/leagues/MLB/", year3, "-standard-fielding.shtml") 56 | data3 <- readHTMLTable(url3, stringsAsFactors = FALSE) 57 | data3 <- data3[[1]] 58 | data3$year3 <- year3 59 | data3 60 | } 61 | 62 | fielding <- ldply(1995:2015, fetch_fielding, .progress="text") 63 | 64 | #Scrape batting data 65 | fetch_batting <- function(year4) { 66 | url4 <- paste0("http://www.baseball-reference.com/leagues/MLB/", year4, "-standard-batting.shtml") 67 | data4 <- readHTMLTable(url4, stringsAsFactors = FALSE) 68 | data4 <- data4[[1]] 69 | data4$year4 <- year4 70 | data4 71 | } 72 | 73 | batting <- ldply(1995:2015, fetch_batting, .progress="text") 74 | 75 | #Now that we've scraped the data, we need to munge the data frames 76 | #We'll merge the data frames on team name and year 77 | #First, though, we need to clean up the year columns 78 | 79 | #Change column names 80 | names(standings)[24]<-"year" 81 | names(pitching)[37]<-"year" 82 | names(fielding)[17]<-"year" 83 | names(batting)[30]<-"year" 84 | 85 | #Merge the five data frames on team name and year 86 | #We can only merge two data frames at a time 87 | #So we'll have to repeat the process a few times 88 | teams_data <- merge(standings, attendance, by=c("Tm", "year")) 89 | teams_data2 <- merge(teams_data, batting, by=c("Tm", "year")) 90 | teams_data3 <- merge(teams_data2, fielding, by=c("Tm", "year")) 91 | teams_data4 <- merge(teams_data3, pitching, by=c("Tm", "year")) 92 | 93 | #It's possible that team names have changed over time 94 | #Let's inspect the data frame to see if that's the case 95 | #A "correct" team should have 21 records 96 | teams_counts <- aggregate(year ~ Tm, data = teams_data4, length) 97 | print(teams_counts) 98 | teams_counts[order(teams_counts$year),] 99 | 100 | #It looks like we might have issues with 9 records 101 | #ARI came into the NL in 1998, so their data is fine as is 102 | #Tampa Bay should also only have 18 seasons of data 103 | 104 | #Duplicated columns will prevent us from running the next commands, 105 | #so let's delete those here 106 | teams_data4 <- teams_data4[c(-71,-92)] 107 | 108 | #Combine MON and WSN 109 | teams_data5 <- mutate(teams_data4, Tm = recode(Tm, "'MON'='WSN'")) 110 | 111 | #Combine FLA and MIA 112 | teams_data5 <- mutate(teams_data5, Tm = recode(Tm, "'FLA'='MIA'")) 113 | 114 | #Combine TBD and TBR 115 | teams_data5 <- mutate(teams_data5, Tm = recode(Tm, "'TBD'='TBR'")) 116 | 117 | #Combine CAL, ANA, and LAA 118 | teams_data5 <- mutate(teams_data5, Tm = recode(Tm, "'CAL'='LAA'")) 119 | teams_data5 <- mutate(teams_data5, Tm = recode(Tm, "'ANA'='LAA'")) 120 | 121 | #Check the data frame to see if all now looks OK 122 | teams_counts1 <- aggregate(year ~ Tm, data = teams_data5, length) 123 | print(teams_counts1) 124 | 125 | #Let's inspect the data to see if it all looks good 126 | str(teams_data5) 127 | 128 | #We need to remove the commas from the attendance column, since R will 129 | #have trouble reading it 130 | remove_commas <- function(x) { 131 | x <- str_replace_all(x, ",", "") 132 | } 133 | 134 | teams_data5$Attendance <- remove_commas(teams_data5$Attendance) 135 | 136 | #All the colums have been read as characters 137 | #Let's identify the numeric columns and make them numeric 138 | #Let's also rename the data frame while we're at it 139 | columns <- subset(teams_data5, select = c(5:13, 25:33, 40:81, 84:117)) 140 | teams <- data.frame(sapply(columns, as.numeric)) 141 | teams_other <- subset(teams_data5, select = c(-5:-13, -25:-33, -40:-81, -84:-117)) 142 | teams_final <- data.frame(teams, teams_other) 143 | 144 | #Let's add a column to the data frame that identifies which quartile 145 | #each team falls under in terms of wins 146 | setDT(teams_final) 147 | teams_final[,wins_quartile:=cut(W.x, 148 | breaks=quantile(W.x,probs=seq(0,1,by=1/4)), 149 | labels=1:4,right=F)] 150 | 151 | str(teams_final$wins_quartile) 152 | 153 | #I want to create rad viz plots but found it quite cumbersome in R 154 | #So, let's write CSVs of the data we want to visualize and import to 155 | #Python, which makes rad viz pretty easy using Pandas 156 | offensive_stats <- subset(teams_final, select = c(118,25,26,29,31,34)) 157 | write.csv(offensive_stats, file = "offensive_stats.csv") 158 | 159 | pitching_stats <- subset(teams_final, select = c(118,56,67,79,81)) 160 | write.csv(pitching_stats, file = "pitching_stats.csv") 161 | 162 | #Develop cleveland dot plots 163 | #Wins by Season 164 | ggplot(teams_final)+ geom_point(aes(x=Tm, y=W.x), colour = "blue") + 165 | coord_flip() + ggtitle("Single Season Win Totals") + 166 | xlab("Team") + ylab("Wins") 167 | 168 | #Total Wins 169 | wins_total <- aggregate(W.x ~ Tm, data = teams_final, sum) 170 | wins_total1$Tm <-factor(wins_total$Tm, levels=wins_total[order(wins_total$W.x), "Tm"]) 171 | 172 | ggplot(wins_total1)+ geom_point(aes(x=Tm, y=W.x), colour = "blue") + 173 | coord_flip() + ggtitle("Win Totals from 1995-2015") + 174 | xlab("Team") + ylab("Wins") 175 | 176 | #Total Home Runs 177 | hr_total <- aggregate(HR.x ~ Tm, data = teams_final, sum) 178 | hr_total$Tm <-factor(hr_total$Tm, levels=hr_total[order(hr_total$HR.x), "Tm"]) 179 | 180 | ggplot(hr_total)+ geom_point(aes(x=Tm, y=HR.x), colour = "blue") + 181 | coord_flip() + ggtitle("Home Run Totals from 1995-2015") + 182 | xlab("Team") + ylab("Home Runs") 183 | 184 | #Total Stolen Bases 185 | sb_total <- aggregate(SB ~ Tm, data = teams_final, sum) 186 | sb_total$Tm <-factor(sb_total$Tm, levels=sb_total[order(sb_total$SB), "Tm"]) 187 | 188 | ggplot(sb_total)+ geom_point(aes(x=Tm, y=SB), colour = "blue") + 189 | coord_flip() + ggtitle("Stolen Base Totals from 1995-2015") + 190 | xlab("Team") + ylab("Stolen Bases") 191 | 192 | #Total Errors 193 | e_total <- aggregate(E ~ Tm, data = teams_final, sum) 194 | e_total$Tm <-factor(e_total$Tm, levels=e_total[order(e_total$E), "Tm"]) 195 | 196 | ggplot(e_total)+ geom_point(aes(x=Tm, y=E), colour = "blue") + 197 | coord_flip() + ggtitle("Error Totals from 1995-2015") + 198 | xlab("Team") + ylab("Errors") 199 | 200 | #Total Earned Runs 201 | er_total <- aggregate(ER ~ Tm, data = teams_final, sum) 202 | er_total$Tm <-factor(er_total$Tm, levels=er_total[order(er_total$ER), "Tm"]) 203 | 204 | ggplot(er_total)+ geom_point(aes(x=Tm, y=ER), colour = "blue") + 205 | coord_flip() + ggtitle("Earned Run Totals from 1995-2015") + 206 | xlab("Team") + ylab("Earned Runs") 207 | 208 | #Alluvial plot 209 | wins_over_time <- subset(teams_final, select = c(95,96,2)) 210 | selected_teams <- filter(wins_over_time, Tm == "NYY" | Tm == "ATL" | Tm == "STL" 211 | | Tm == "SFG" | Tm == "BOS" | Tm == "TEX") 212 | 213 | alluvial_ts(selected_teams, title = "Wins over Time") 214 | 215 | #Now that we've inspected the data, let's do some prediction 216 | #What seems to impact attendance? 217 | #Let's start with subsetting our data frame to only include the columns we 218 | #propose are predictive 219 | teams_subset <- subset(teams_final, select = c(10,2,12,13,17,25,34,56,73,77,95)) 220 | summary(teams_subset) 221 | teams_subset$Tm <- as.factor(as.character(teams_subset$Tm)) 222 | 223 | #Let's also sum the attendance column for context 224 | sum(teams_subset$Attendance) 225 | #1,492,344,734 226 | 227 | #Split data into training and test sets 228 | set.seed(10) 229 | train = sample(1: nrow(x), nrow(x)/2) 230 | test = (-train ) 231 | y.test = y[test] 232 | 233 | #Ordinary least squares 234 | #I'm interested in seeing what the coefficients will look like if we feed the 235 | #model the entire dataset 236 | pairs(teams_subset) 237 | ols_model <- lm(Attendance ~ 0 + W.x + BatAge.x + PAge.x + X.A.S + 238 | R.y + SO.x + E + SV + ER + Tm, data=teams_subset) 239 | plot(ols_model) 240 | summary(ols_model) 241 | vif(ols_model) 242 | 243 | #Looks like we have some issues with multi-collinearity, so let's pivot to 244 | #ridge and lasso 245 | 246 | #Ridge regression 247 | #Create matrices needed for the glmnet package 248 | x <- model.matrix (Attendance ~.,teams_subset )[,-1] 249 | y=teams_subset$Attendance 250 | 251 | #Run the ridge regression model 252 | ridge_model = glmnet(x[train,], y[train], alpha = 0) 253 | 254 | #Use cross-validation to determine the best value for lambda 255 | cv.out = cv.glmnet(x[train,], y[train], alpha = 0) 256 | plot(cv.out) 257 | bestlam = cv.out$lambda.min 258 | bestlam 259 | 260 | #Run the ridge regression on the full dataset using the optimal lambda value 261 | #View the coefficients of the model 262 | out = glmnet(x,y,alpha =0) 263 | ridge.coef = predict(out, type ="coefficients", s=bestlam) 264 | ridge.coef 265 | 266 | #Lasso regression 267 | #Follow the same process as above, except we need to set alpha = 1 268 | 269 | #Run the lasso regression model 270 | lasso_model = glmnet(x[train,], y[train], alpha = 1) 271 | 272 | #Use cross-validation to determine the best lambda 273 | cv.out1 = cv.glmnet(x[train,], y[train], alpha = 1) 274 | plot(cv.out1) 275 | bestlam1 = cv.out$lambda.min 276 | bestlam1 277 | 278 | #Run the lasso regression on the full dataset using the optimal lambda value 279 | #View the coefficients of the model 280 | out1 = glmnet(x,y,alpha = 1) 281 | lasso.coef = predict(out1, type ="coefficients", s=bestlam1) 282 | lasso.coef 283 | 284 | #Lastly, let's look at the MSE of each model 285 | #MSE of the OLS model 286 | #OLS is simply the same as setting lambda equal to zero 287 | ols_pred = predict(ridge.mod ,s=0, newx=x[test,], exact=T) 288 | ols_mse <- mean((ols_pred -y.test)^2) 289 | print(ols_mse) 290 | sqrt(ols_mse) 291 | #MSE: 197,876,169,581 292 | #RMSE:444,832 293 | 294 | #MSE of the ridge regression model 295 | ridge.pred = predict(ridge_model,s=bestlam ,newx=x[test ,]) 296 | ridge_mse <- mean((ridge.pred-y.test)^2) 297 | print(ridge_mse) 298 | sqrt(ridge_mse) 299 | #MSE: 189,415,000,000 300 | #RMSE: 435,218 301 | 302 | #MSE of the lasso regression model 303 | lasso.pred = predict(lasso_model,s=bestlam ,newx=x[test ,]) 304 | lasso_mse <- mean((lasso.pred-y.test)^2) 305 | print(lasso_mse) 306 | sqrt(lasso_mse) 307 | #MSE: 220,407,311,134 308 | #RMSE: 469,475 309 | 310 | 311 | 312 | -------------------------------------------------------------------------------- /r_scripts/rookie_all_star_predictions.Rmd: -------------------------------------------------------------------------------- 1 | # Rookie All-Star Modeling 2 | The goal of this project is twofold: 1) predict if a player will become an all-star based on their rookie offensive stats and 2) determine clusters of rookies. 3 | 4 | At the end of the day, this is a fairly limited analysis, though we can see some interesting 5 | patterns in the data. 6 | 7 | ```{r message=FALSE} 8 | options(warn=-1) 9 | setwd("C:/Users/Micah/Desktop/applied_data_mining") 10 | set.seed(19) 11 | 12 | library(ggplot2) 13 | library(lattice) 14 | library(caret) 15 | library(pROC) 16 | library(plyr) 17 | library(rpart) 18 | library(rattle) 19 | library(cluster) 20 | library(data.table) 21 | library(MASS) 22 | library(colorRamps) 23 | library(nFactors) 24 | library(gplots) 25 | library(RColorBrewer) 26 | library(semPlot) 27 | library(waffle) 28 | library(extrafont) 29 | 30 | font_import() 31 | 32 | 33 | ``` 34 | 35 | # Cleaning Functions 36 | ```{r} 37 | subset_to_rookie_year <- function(df){ 38 | # Define rookie season as first yearID in which player had more than 100 ABs. 39 | df[, 'yearID'] <- sapply(df[, 'yearID'], as.numeric) 40 | eligible_df <- df[ which(df$AB > 100), ] 41 | rookie_df <- aggregate(eligible_df$yearID, by=list(eligible_df$playerID), min) 42 | colnames(rookie_df) <- c('playerID', 'yearID') 43 | df <- merge(df, rookie_df, by=c('playerID', 'yearID')) 44 | df <- df[!duplicated(df$playerID),] 45 | # For simplicity, remove small number of players with 100+ ABs for two teams 46 | # in their rookie season. 47 | df <- df[ which(df$AB > 100), ] 48 | return(df) 49 | } 50 | 51 | 52 | # Only use the last few decades of players. 53 | # Do not use players who are too recent - they may still become all-stars. 54 | subset_to_between_1970_and_2010 <- function(df){ 55 | df <- df[ which(df$yearID >= 1970 & df$yearID <= 2010), ] 56 | return(df) 57 | } 58 | 59 | 60 | count_all_star_appearances <- function(all_stars, batting){ 61 | all_star_temp <- all_stars[,c('playerID', 'yearID')] 62 | all_star_temp$rookie_all_star_appearance <- 'yes' 63 | batting <- merge(batting, all_star_temp, by=c('playerID', 'yearID'), all.x=TRUE) 64 | batting$rookie_all_star_appearance[is.na(batting$rookie_all_star_appearance)] <- 'no' 65 | 66 | batting$rookie_id <- 'yes' 67 | all_stars_non_rookie <- merge(all_stars, batting, by=c('playerID', 'yearID'), all.x=TRUE) 68 | all_stars_non_rookie$rookie_id[is.na(all_stars_non_rookie$rookie_id)] <- 'no' 69 | all_stars_non_rookie <- all_stars_non_rookie[ which(all_stars_non_rookie$rookie_id == 'no'), ] 70 | 71 | all_stars_non_rookie <- all_stars_non_rookie[c('playerID')] 72 | all_stars_non_rookie <- as.data.frame(table(all_stars_non_rookie)) 73 | colnames(all_stars_non_rookie) <- c('playerID', 'all_star') 74 | 75 | merged_df <- merge(batting, all_stars_non_rookie, by='playerID', all.x=TRUE) 76 | merged_df$all_star[merged_df$all_star > 0] <- "yes" 77 | merged_df$all_star[merged_df$all_star != 'yes'] <- "no" 78 | merged_df$all_star[is.na(merged_df$all_star)] <- 'no' 79 | return(merged_df) 80 | } 81 | 82 | 83 | create_name_to_id_mapping <- function(df){ 84 | df$playerName <- paste(df$nameFirst, ' ', df$nameLast) 85 | df <- df[c('playerID', 'playerName')] 86 | return(df) 87 | } 88 | 89 | 90 | calculate_slg_obp_obp_and_avg <- function(df){ 91 | df[is.na(df)] <- 0 92 | df$avg <- df$H / df$AB 93 | df$obp <- (df$H + df$BB + df$HBP) / (df$AB + df$BB + df$HBP + df$SF) 94 | df$slg <- (df$H + (df$X2B + df$X3B + df$HR) + (df$X2B * 2) + 95 | (df$X3B * 3 + df$HR * 4)) / df$AB 96 | df[is.na(df)] <- 0 97 | return(df) 98 | } 99 | 100 | 101 | select_columns_for_modeling <- function(df){ 102 | df <- subset(df, select=c(G, AB, R, H, X2B, X3B, HR, RBI, SB, BB, SO, avg, obp, slg, 103 | all_star, playerID)) 104 | return(df) 105 | } 106 | 107 | 108 | drop_player_id <- function(df){ 109 | drop <- c('playerID') 110 | df <- df[ , !(names(df) %in% drop)] 111 | return(df) 112 | } 113 | 114 | ``` 115 | 116 | ## Exploration Functions 117 | ```{r} 118 | count_factor_occurrences_by_target <- function(df, feature, target, title){ 119 | print(ggplot(df, aes_string(feature, fill = target)) + 120 | geom_bar() + ggtitle(title)) 121 | } 122 | 123 | 124 | make_histogram_by_target <- function(df, feature, target, title, bins){ 125 | print(ggplot(df, aes_string(feature, fill = target)) + 126 | geom_histogram(binwidth = bins) + ggtitle(title)) 127 | } 128 | 129 | 130 | make_parallel_coordinates <- function(df, feature, cuts){ 131 | c <- blue2red(cuts) 132 | r <- cut(feature, cuts) 133 | parcoord(df, col=c[as.numeric(r)]) 134 | } 135 | 136 | ``` 137 | 138 | ## Factor Analysis Functions 139 | ```{r} 140 | make_scree_table_for_factor_analysis <- function(df){ 141 | nScree(df) 142 | } 143 | 144 | 145 | get_eigenvalues <- function(df){ 146 | eigen(cor(df)) 147 | } 148 | 149 | 150 | build_factor_analysis_model <- function(df, n_factors){ 151 | fa <- factanal(df, factors = n_factors, lower = 0.01) 152 | print(fa) 153 | return(fa) 154 | } 155 | 156 | 157 | make_factor_analysis_heatmp <- function(fa){ 158 | heatmap.2(fa$loadings, col = brewer.pal(9, "Greens"), trace = "none", 159 | key = FALSE, dend = 'none', Colv = FALSE, cexCol = 1.2, 160 | main = "Factor Loadings") 161 | } 162 | 163 | 164 | make_factor_analysis_sem_plot <- function(fa){ 165 | semPaths(fa, what = "est", residuals = FALSE, cut = 0.4, 166 | posCol = c("white", "darkgreen"), 167 | negCol = c("white", "red"), 168 | edge.label.cex = 0.60, nCharNodes = 7) 169 | } 170 | 171 | ``` 172 | 173 | ## Supervised Machine Learning Functions 174 | ```{r} 175 | train_random_forest <- function(train_df, target){ 176 | control <- trainControl(method="repeatedcv", number=3, repeats=3, classProbs=TRUE) 177 | mtry <- c(sqrt(ncol(train_df)), log2(ncol(train_df))) 178 | grid <- expand.grid(.mtry=mtry) 179 | formula <- as.formula(paste(target, "~ .")) 180 | 181 | model <- train(formula, 182 | data=train_df, 183 | preProcess=c("center", "scale"), 184 | method="rf", 185 | metric="ROC", 186 | trControl=control, 187 | tuneGrid=grid, 188 | allowParallel=TRUE, 189 | num.threads=4) 190 | return(model) 191 | } 192 | 193 | 194 | train_log_reg <- function(train_df, target){ 195 | control <- trainControl(method="repeatedcv", number=10, repeats=3, classProbs=TRUE) 196 | grid <- expand.grid(parameter=c(0.001, 0.01, 0.1, 1,10, 100)) 197 | formula <- as.formula(paste(target, "~ .")) 198 | 199 | model <- train(formula, 200 | data=train_df, 201 | preProcess=c("center", "scale"), 202 | method="glm", 203 | family="binomial", 204 | metric="ROC", 205 | trControl=control, 206 | tuneGrid=grid) 207 | 208 | return(model) 209 | } 210 | 211 | 212 | train_decision_tree <- function(train_df, target){ 213 | control <- trainControl(method="repeatedcv", number=10, repeats=3, classProbs=TRUE) 214 | grid <- expand.grid(.maxdepth=c(3, 5, 7, 10)) 215 | formula <- as.formula(paste(target, "~ .")) 216 | 217 | model <- train(formula, 218 | data=train_df, 219 | preProcess=c("center", "scale"), 220 | method="rpart2", 221 | metric="ROC", 222 | trControl=control, 223 | tuneGrid=grid) 224 | return(model) 225 | } 226 | 227 | 228 | train_gradient_boosting <- function(train_df, target){ 229 | control <- trainControl(method="repeatedcv", number=10, repeats=3, classProbs=TRUE) 230 | grid <- expand.grid(interaction.depth = c(1, 3, 5), 231 | n.trees = c(50, 100, 150), 232 | shrinkage = 0.1, 233 | n.minobsinnode = 20) 234 | 235 | formula <- as.formula(paste(target, "~ .")) 236 | 237 | model <- train(formula, 238 | data=train_df, 239 | preProcess=c("center", "scale"), 240 | method="gbm", 241 | metric="ROC", 242 | verbose=F, 243 | trControl=control, 244 | tuneGrid=grid) 245 | return(model) 246 | } 247 | 248 | 249 | plot_decision_tree <- function(df, target, depth){ 250 | formula <- as.formula(paste(target, "~ .")) 251 | set.seed(19) 252 | tree <- rpart(formula, method="class", maxdepth=depth, data=df) 253 | printcp(tree) 254 | print(tree) 255 | fancyRpartPlot(tree) 256 | } 257 | 258 | 259 | plot_model <- function(model){ 260 | plot(model) 261 | } 262 | 263 | 264 | print_grid_search_results <- function(model){ 265 | model$bestTune 266 | results <- model$results 267 | results 268 | } 269 | 270 | 271 | print_confusion_matrix <- function(model, df, target){ 272 | predictions <- predict(model, df) 273 | con_matrix <- confusionMatrix(predictions, target, positive = 'yes') 274 | con_matrix 275 | } 276 | 277 | 278 | get_roc_auc <- function(model, df, target){ 279 | probabilities <- predict(model, df, type="prob") 280 | 281 | ROC <- roc(predictor=probabilities$yes, 282 | response=target) 283 | print(ROC$auc) 284 | plot(ROC, main="ROC") 285 | return(ROC) 286 | } 287 | 288 | 289 | get_variable_importances <- function(model){ 290 | varImp(model) 291 | } 292 | 293 | ``` 294 | 295 | ## Unsupervised Machine Learning Functions 296 | ```{r} 297 | scale_dataframe <- function(df){ 298 | df[, -c(3)] <- scale(df[, -c(3)]) 299 | df <- data.frame(df) 300 | return(df) 301 | } 302 | 303 | 304 | plot_within_cluster_sum_of_squares <- function(df, title){ 305 | wss <- (nrow(df)-1) * sum(apply(df, 2, var)) 306 | for (i in 2:15) wss[i] <- sum(kmeans(df, centers=i)$withinss) 307 | plot(1:15, wss, type="b", xlab="Number of Clusters", 308 | ylab="Within groups sum of squares", main = paste(title,' elbow plot')) 309 | } 310 | 311 | 312 | train_k_means_model <- function(df, k){ 313 | set.seed(19) 314 | model <- kmeans(df, k, nstart=25) 315 | return(model) 316 | } 317 | 318 | 319 | plot_k_means_model <- function(model, df, title){ 320 | clusplot(df, model$cluster, color=TRUE, shade=TRUE, 321 | labels=2, lines=0, main = paste(title,' PCA Plot of K-Means')) 322 | } 323 | 324 | 325 | create_hclust_and_plot <- function(df){ 326 | set.seed(19) 327 | dm = dist(df,method="euclidean") 328 | hclust_model <- hclust(dm, method="complete") 329 | plot(hclust_model) 330 | return(hclust_model) 331 | } 332 | 333 | 334 | summarize_clusters <- function(df){ 335 | cluster1 <- df[which(df$k_means.cluster=='1'),] 336 | cluster2 <- df[which(df$k_means.cluster=='2'),] 337 | cluster3 <- df[which(df$k_means.cluster=='3'),] 338 | 339 | print('cluster 1 summary') 340 | print(summary(cluster1)) 341 | print('cluster 2 summary') 342 | print(summary(cluster2)) 343 | print('cluster 3 summary') 344 | print(summary(cluster3)) 345 | } 346 | 347 | ``` 348 | 349 | ## Execution 350 | 351 | ### Read in data 352 | ```{r} 353 | all_star_df <- read.csv('data/AllstarFull.csv') 354 | batting_df <- read.csv('data/Batting.csv') 355 | people_df <- read.csv('data/People.csv') 356 | 357 | ``` 358 | 359 | ### Data cleaning 360 | ```{r} 361 | batting_df <- subset_to_rookie_year(batting_df) 362 | batting_df <- subset_to_between_1970_and_2010(batting_df) 363 | batting_df <- count_all_star_appearances(all_star_df, batting_df) 364 | batting_df <- calculate_slg_obp_obp_and_avg(batting_df) 365 | batting_df <- select_columns_for_modeling(batting_df) 366 | batting_df_copy <- batting_df 367 | batting_df <- drop_player_id(batting_df) 368 | 369 | ``` 370 | 371 | ### Data Exploration 372 | ```{r} 373 | agg_cols_for_hist <- c('G', 'H', 'X2B', 'HR', 'RBI', 'SB') 374 | for (column in agg_cols_for_hist){ 375 | make_histogram_by_target(batting_df, column, 'all_star', 376 | paste(column,' histogram by all star'), 10) 377 | } 378 | 379 | rate_cols_for_hist <- c('avg', 'obp', 'slg') 380 | for (column in rate_cols_for_hist){ 381 | make_histogram_by_target(batting_df, column, 'all_star', 382 | paste(column,' histogram by all star'), .1) 383 | } 384 | 385 | # home runs paralell coordinates 386 | make_parallel_coordinates(batting_df[1:14], batting_df$HR, 20) 387 | # obp paralell coordinates 388 | make_parallel_coordinates(batting_df[1:14], batting_df$obp, 20) 389 | # hits paralell coordinates 390 | make_parallel_coordinates(batting_df[1:14], batting_df$H, 20) 391 | # slg paralell coordinates 392 | make_parallel_coordinates(batting_df[1:14], batting_df$slg, 20) 393 | 394 | ``` 395 | 396 | ## Factor Analysis 397 | ```{r} 398 | make_scree_table_for_factor_analysis(batting_df[1:14]) 399 | get_eigenvalues(batting_df[1:14]) 400 | batting_factor_analysis <- build_factor_analysis_model(batting_df[1:14], 3) 401 | make_factor_analysis_heatmp(batting_factor_analysis) 402 | make_factor_analysis_sem_plot(batting_factor_analysis) 403 | 404 | ``` 405 | 406 | ### Classification Models 407 | ### Class Imbalance 408 | ```{r} 409 | # This is not perfectly to scale but close enough to be useful. 410 | waffle(c(all_star = 52, non_all_star = 233), rows = 19, 411 | title = "Target Distribution") 412 | 413 | ``` 414 | 415 | 416 | #### Train-Test Splits 417 | ```{r} 418 | partition <- createDataPartition(batting_df$all_star, p = 0.7, list=FALSE) 419 | train_df <- batting_df[partition, ] 420 | test_df <- batting_df[-partition, ] 421 | 422 | ``` 423 | 424 | #### Decision Tree 425 | ```{r} 426 | decision_tree <- train_decision_tree(train_df, 'all_star') 427 | plot_model(decision_tree) 428 | print_grid_search_results(decision_tree) 429 | print_confusion_matrix(decision_tree, test_df, test_df$all_star) 430 | tree_roc <- get_roc_auc(decision_tree, test_df, test_df$all_star) 431 | plot_decision_tree(train_df, 'all_star', 3) 432 | 433 | ``` 434 | 435 | #### Random Forest 436 | ```{r message=FALSE} 437 | random_forest <- train_random_forest(train_df, 'all_star') 438 | plot_model(random_forest) 439 | print_grid_search_results(random_forest) 440 | print_confusion_matrix(random_forest, test_df, test_df$all_star) 441 | forest_roc <- get_roc_auc(random_forest, test_df, test_df$all_star) 442 | get_variable_importances(random_forest) 443 | 444 | ``` 445 | 446 | #### Gradient Boosting 447 | ```{r message=FALSE} 448 | gradient_boosting <- train_gradient_boosting(train_df, 'all_star') 449 | plot_model(gradient_boosting) 450 | print_grid_search_results(gradient_boosting) 451 | print_confusion_matrix(gradient_boosting, test_df, test_df$all_star) 452 | gb_roc <- get_roc_auc(gradient_boosting, test_df, test_df$all_star) 453 | get_variable_importances(gradient_boosting) 454 | 455 | ``` 456 | 457 | #### Logistic Regression 458 | ```{r} 459 | log_reg <- train_log_reg(train_df, 'all_star') 460 | print_grid_search_results(log_reg) 461 | print_confusion_matrix(log_reg, test_df, test_df$all_star) 462 | lr_roc <- get_roc_auc(log_reg, test_df, test_df$all_star) 463 | 464 | ``` 465 | 466 | ## Clustering 467 | ### Data Preparation 468 | ```{r} 469 | rownames(batting_df_copy) <- batting_df_copy$playerID 470 | batting_df_copy <- subset(batting_df_copy, select=-c(playerID, all_star)) 471 | batting_df_scaled <- scale_dataframe(batting_df_copy) 472 | 473 | ``` 474 | 475 | ### K-Means Clustering 476 | ```{r} 477 | plot_within_cluster_sum_of_squares(batting_df_scaled, 'rookie batting data') 478 | k_means <- train_k_means_model(batting_df_scaled, 3) 479 | plot_k_means_model(k_means, batting_df_scaled, 'rookie batting') 480 | batting_df_copy <- data.frame(batting_df_copy, k_means$cluster) 481 | summarize_clusters(batting_df_copy) 482 | batting_df_copy$k_means.cluster <- as.factor(batting_df_copy$k_means.cluster) 483 | 484 | agg_summary_cols <- c('G', 'AB', 'R', 'H', 'X2B', 'X3B', 'HR', 'RBI', 'SB') 485 | for (column in agg_summary_cols){ 486 | make_histogram_by_target(batting_df_copy, column, 'k_means.cluster', 487 | paste(column,' histogram by cluster'), 10) 488 | } 489 | 490 | rate_summary_cols <- c('avg', 'obp', 'slg') 491 | for (column in rate_summary_cols){ 492 | make_histogram_by_target(batting_df_copy, column, 'k_means.cluster', 493 | paste(column,' histogram by cluster'), .1) 494 | } 495 | 496 | ``` 497 | 498 | ### Heirarchical Clustering 499 | ```{r} 500 | hclust_model <- create_hclust_and_plot(batting_df_scaled) 501 | plot(cut(as.dendrogram(hclust_model), h=8)$lower[[4]]) 502 | plot(cut(as.dendrogram(hclust_model), h=6)$lower[[15]]) 503 | 504 | ``` 505 | 506 | -------------------------------------------------------------------------------- /r_scripts/association_rules_2016_games.r: -------------------------------------------------------------------------------- 1 | ##Load libraries## 2 | library(XML) 3 | library(ggplot2) 4 | library(arules) 5 | library(arulesViz) 6 | library(plyr) 7 | library(dplyr) 8 | library(stringr) 9 | library(matrixStats) 10 | library(data.table) 11 | library(Hmisc) 12 | library(gridExtra) 13 | library(knitr) 14 | library(rgl) 15 | 16 | ##Read in and inspect data## 17 | 18 | #Scrape a dataset that includes each team's abbreviation 19 | #We'll isolate the names and put them in a list, which will be used to scrape 20 | #the data we really want 21 | abbreviations <- htmlParse("http://www.baseball-reference.com/leagues/MLB/2016.shtml") 22 | abbreviations.tab <- readHTMLTable(abbreviations, stringsAsFactors=FALSE) 23 | abbreviations.df <- abbreviations.tab[[2]] 24 | 25 | #Create list of teams 26 | teams <- list(abbreviations.df$Tm) 27 | teams <- sapply(teams, "[", c(1:30)) 28 | 29 | #Let's now scrape per-game offensive statistics for each team 30 | fetch_offense <- function(team) { 31 | url <- paste0("http://www.baseball-reference.com/teams/tgl.cgi?team=", team, "&t=b&year=2016.com") 32 | data <- readHTMLTable(url, stringsAsFactors = FALSE) 33 | data <- data[[1]] 34 | data$team <- team 35 | data 36 | } 37 | 38 | offensive_stats <- ldply("teams", fetch_offense, .progress="text") 39 | 40 | #Hmmmm, I'm getting a subscript out of bounds error 41 | #Let's try a loop and see if that works 42 | results <- data.frame() 43 | for (i in "teams") { 44 | results <- c(results, fetch_offense(i)) 45 | } 46 | 47 | #Still getting the same error; looks like we may have to manually 48 | #insert each team 49 | ARI_offense <- ldply("ARI", fetch_offense, .progress="text") 50 | ATL_offense <- ldply("ATL", fetch_offense, .progress="text") 51 | BAL_offense <- ldply("BAL", fetch_offense, .progress="text") 52 | BOS_offense <- ldply("BOS", fetch_offense, .progress="text") 53 | CHC_offense <- ldply("CHC", fetch_offense, .progress="text") 54 | CHW_offense <- ldply("CHW", fetch_offense, .progress="text") 55 | CIN_offense <- ldply("CIN", fetch_offense, .progress="text") 56 | CLE_offense <- ldply("CLE", fetch_offense, .progress="text") 57 | COL_offense <- ldply("COL", fetch_offense, .progress="text") 58 | DET_offense <- ldply("DET", fetch_offense, .progress="text") 59 | HOU_offense <- ldply("HOU", fetch_offense, .progress="text") 60 | KCR_offense <- ldply("KCR", fetch_offense, .progress="text") 61 | LAA_offense <- ldply("LAA", fetch_offense, .progress="text") 62 | LAD_offense <- ldply("LAD", fetch_offense, .progress="text") 63 | MIA_offense <- ldply("MIA", fetch_offense, .progress="text") 64 | MIL_offense <- ldply("MIL", fetch_offense, .progress="text") 65 | MIN_offense <- ldply("MIN", fetch_offense, .progress="text") 66 | NYM_offense <- ldply("NYM", fetch_offense, .progress="text") 67 | NYY_offense <- ldply("NYY", fetch_offense, .progress="text") 68 | OAK_offense <- ldply("OAK", fetch_offense, .progress="text") 69 | PHI_offense <- ldply("PHI", fetch_offense, .progress="text") 70 | PIT_offense <- ldply("PIT", fetch_offense, .progress="text") 71 | SDP_offense <- ldply("SDP", fetch_offense, .progress="text") 72 | SEA_offense <- ldply("SEA", fetch_offense, .progress="text") 73 | SFG_offense <- ldply("SFG", fetch_offense, .progress="text") 74 | STL_offense <- ldply("STL", fetch_offense, .progress="text") 75 | TBR_offense <- ldply("TBR", fetch_offense, .progress="text") 76 | TEX_offense <- ldply("TEX", fetch_offense, .progress="text") 77 | TOR_offense <- ldply("TOR", fetch_offense, .progress="text") 78 | WSN_offense <- ldply("WSN", fetch_offense, .progress="text") 79 | 80 | #Bind the data frames 81 | offensive_complete <- rbind(ARI_offense, ATL_offense, BAL_offense, BOS_offense, CHC_offense, 82 | CHW_offense, CIN_offense, CLE_offense, COL_offense, DET_offense, HOU_offense, KCR_offense, 83 | LAA_offense, LAD_offense, MIA_offense, MIL_offense, MIN_offense, NYM_offense, NYY_offense, 84 | OAK_offense, PHI_offense, PIT_offense, SDP_offense, SEA_offense, SFG_offense, STL_offense, 85 | TBR_offense, TEX_offense, TOR_offense, WSN_offense) 86 | 87 | #OK, now that we have offensive stats, let's scrape pitching stats 88 | fetch_pitching <- function(team) { 89 | url <- paste0("http://www.baseball-reference.com/teams/tgl.cgi?team=", team, "&t=p&year=2016.com") 90 | data <- readHTMLTable(url, stringsAsFactors = FALSE) 91 | data <- data[[1]] 92 | data$team <- team 93 | data 94 | } 95 | 96 | pitching_stats <- ldply("teams", fetch_pitching, .progress="text") 97 | 98 | #Still getting a subscript out of bounds error, so we'll have to 99 | #manually insert each team 100 | ARI_pitching <- ldply("ARI", fetch_pitching, .progress="text") 101 | ATL_pitching <- ldply("ATL", fetch_pitching, .progress="text") 102 | BAL_pitching <- ldply("BAL", fetch_pitching, .progress="text") 103 | BOS_pitching <- ldply("BOS", fetch_pitching, .progress="text") 104 | CHC_pitching <- ldply("CHC", fetch_pitching, .progress="text") 105 | CHW_pitching <- ldply("CHW", fetch_pitching, .progress="text") 106 | CIN_pitching <- ldply("CIN", fetch_pitching, .progress="text") 107 | CLE_pitching <- ldply("CLE", fetch_pitching, .progress="text") 108 | COL_pitching <- ldply("COL", fetch_pitching, .progress="text") 109 | DET_pitching <- ldply("DET", fetch_pitching, .progress="text") 110 | HOU_pitching <- ldply("HOU", fetch_pitching, .progress="text") 111 | KCR_pitching <- ldply("KCR", fetch_pitching, .progress="text") 112 | LAA_pitching <- ldply("LAA", fetch_pitching, .progress="text") 113 | LAD_pitching <- ldply("LAD", fetch_pitching, .progress="text") 114 | MIA_pitching <- ldply("MIA", fetch_pitching, .progress="text") 115 | MIL_pitching <- ldply("MIL", fetch_pitching, .progress="text") 116 | MIN_pitching <- ldply("MIN", fetch_pitching, .progress="text") 117 | NYM_pitching <- ldply("NYM", fetch_pitching, .progress="text") 118 | NYY_pitching <- ldply("NYY", fetch_pitching, .progress="text") 119 | OAK_pitching <- ldply("OAK", fetch_pitching, .progress="text") 120 | PHI_pitching <- ldply("PHI", fetch_pitching, .progress="text") 121 | PIT_pitching <- ldply("PIT", fetch_pitching, .progress="text") 122 | SDP_pitching <- ldply("SDP", fetch_pitching, .progress="text") 123 | SEA_pitching <- ldply("SEA", fetch_pitching, .progress="text") 124 | SFG_pitching <- ldply("SFG", fetch_pitching, .progress="text") 125 | STL_pitching <- ldply("STL", fetch_pitching, .progress="text") 126 | TBR_pitching <- ldply("TBR", fetch_pitching, .progress="text") 127 | TEX_pitching <- ldply("TEX", fetch_pitching, .progress="text") 128 | TOR_pitching <- ldply("TOR", fetch_pitching, .progress="text") 129 | WSN_pitching <- ldply("WSN", fetch_pitching, .progress="text") 130 | 131 | #Bind the data frames 132 | pitching_complete <- rbind(ARI_pitching, ATL_pitching, BAL_pitching, BOS_pitching, CHC_pitching, 133 | CHW_pitching, CIN_pitching, CLE_pitching, COL_pitching, DET_pitching, HOU_pitching, KCR_pitching, 134 | LAA_pitching, LAD_pitching, MIA_pitching, MIL_pitching, MIN_pitching, NYM_pitching, NYY_pitching, 135 | OAK_pitching, PHI_pitching, PIT_pitching, SDP_pitching, SEA_pitching, SFG_pitching, STL_pitching, 136 | TBR_pitching, TEX_pitching, TOR_pitching, WSN_pitching) 137 | 138 | #The pitching data was read in an odd wa, so let's fix that 139 | fix_date <- function(x) { 140 | x <- str_replace_all(x, "Â", "") 141 | } 142 | 143 | pitching_complete$Date <- fix_date(pitching_complete$Date) 144 | 145 | #Merge data frames on team and Gtm, which is essentially a unique ID 146 | #for each game 147 | game_logs <- merge(offensive_complete, pitching_complete, by=c("Gtm", "team")) 148 | 149 | #Drop the first 750 rows, which are essentially the headings 150 | game_logs <- game_logs[-c(1:750), ] 151 | 152 | #All of the columns were read in as characters 153 | #Let's convert each column to the correct data type 154 | #We'll drop a few columns in the process 155 | cat_columns <- subset(game_logs, select = c(1:6, 32:33, 65)) 156 | game_logs1 <- data.frame(sapply(cat_columns, as.factor)) 157 | summary(game_logs1) 158 | 159 | num_columns <- subset(game_logs, select = c(8:31, 39:64)) 160 | game_logs2 <- data.frame(sapply(num_columns, as.numeric)) 161 | 162 | game_logs3 <- subset(game_logs, select = c(7, 66)) 163 | 164 | games_final <- data.frame(game_logs1, game_logs2, game_logs3) 165 | summary(games_final) 166 | 167 | write.csv(games_final, file = "2016_game_logs.csv") 168 | 169 | ##Create exploratory visualizations## 170 | #Look at distributions of key variables 171 | 172 | #Runs scored 173 | p1 <- ggplot(games_final, aes(R.x)) + 174 | geom_density() + ggtitle("Distribution of Runs Scored") + xlab("Runs Scored") + 175 | ylab(" ") 176 | 177 | #Stolen Bases 178 | p2 <- ggplot(games_final, aes(SB.x)) + 179 | geom_density() + ggtitle("Distribution of Stolen Bases") + xlab("Stolen Bases") + 180 | ylab(" ") 181 | 182 | #Earned Runs 183 | p3 <- ggplot(games_final, aes(ER)) + 184 | geom_density() + ggtitle("Distribution of Earned Runs") + xlab("Earned Runs") + 185 | ylab(" ") 186 | 187 | #Pitches 188 | p4 <- ggplot(games_final, aes(Pit)) + 189 | geom_density() + ggtitle("Distribution of Pitches") + xlab("Pitches") + 190 | ylab(" ") 191 | 192 | grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2) 193 | 194 | #Make 3D scatter plots of runs, hits, and home runs 195 | interleave <- function(v1, v2) as.vector(rbind(v1,v2)) 196 | 197 | plot3d(games_final$R.x, games_final$HR.x, games_final$H.x, xlab = "Runs", 198 | ylab = "Home Runs", zlab = "Hits", type = "s", size = 0.75, lit = FALSE) 199 | 200 | segments3d(interleave(games_final$R.x, games_final$R.x), 201 | interleave(games_final$HR.x, games_final$HR.x), 202 | interleave(games_final$H.x, min(games_final$H.x)), 203 | alpha = 0.4, col = "blue") 204 | 205 | #Create color-coded scatter plot of pitches thrown and walks issued 206 | p5 <- ggplot(games_final, aes(x=Pit, y=BB.y, colour=R.y)) + 207 | geom_point(size=3) + scale_color_gradientn(colours = c("darkred", 208 | "orange", "yellow")) + xlab("Pitches Thrown") + ylab("Walks Issued") + 209 | ggtitle("Relationship between Walks and Pitches \n Colored by Runs Allowed") 210 | 211 | p5 212 | 213 | #Let's dive into some stats by umpire 214 | umps <- table(games_final$Umpire) 215 | umps <- as.data.frame(umps) 216 | mean(umps$Freq) 217 | sd(umps$Freq) 218 | 219 | #Inspect hits by umpire 220 | mean_hits <- aggregate(H.x ~ Umpire, data = games_final, mean) 221 | sd_hits <- aggregate(H.x ~ Umpire, data = games_final, sd) 222 | ump_hits <- merge(mean_hits, sd_hits, by = "Umpire") 223 | names(ump_hits)[2]<-"Mean" 224 | names(ump_hits)[3]<-"Standard Deviation" 225 | ump_hits$Umpire <- sub("^$", "Unknown", ump_hits$Umpire) 226 | ump_hits1 <- head(ump_hits[order(-ump_hits$Mean),], 15) 227 | 228 | data.m <- melt(ump_hits1, id.vars='Umpire') 229 | ggplot(data.m, aes(Umpire, value)) + 230 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 231 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 232 | ggtitle("Mean and Standard Deviations for Hits \n in Games Segmented by Home Plate Umpire") 233 | 234 | #Inspect runs by umpire 235 | mean_runs <- aggregate(R.x ~ Umpire, data = games_final, mean) 236 | sd_runs <- aggregate(R.x ~ Umpire, data = games_final, sd) 237 | ump_runs <- merge(mean_runs, sd_runs, by = "Umpire") 238 | names(ump_runs)[2]<-"Mean" 239 | names(ump_runs)[3]<-"Standard Deviation" 240 | ump_runs$Umpire <- sub("^$", "Unknown", ump_runs$Umpire) 241 | ump_runs1 <- head(ump_runs[order(-ump_runs$Mean),], 15) 242 | 243 | data.m <- melt(ump_runs1, id.vars='Umpire') 244 | ggplot(data.m, aes(Umpire, value)) + 245 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 246 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 247 | ggtitle("Mean and Standard Deviations for Runs \n in Games Segmented by Home Plate Umpire") 248 | 249 | #Inspect walks by umpire 250 | mean_walks <- aggregate(BB.y ~ Umpire, data = games_final, mean) 251 | sd_walks <- aggregate(BB.y ~ Umpire, data = games_final, sd) 252 | ump_walks <- merge(mean_walks, sd_walks, by = "Umpire") 253 | names(ump_walks)[2]<-"Mean" 254 | names(ump_walks)[3]<-"Standard Deviation" 255 | ump_walks$Umpire <- sub("^$", "Unknown", ump_walks$Umpire) 256 | ump_walks1 <- head(ump_walks[order(-ump_walks$Mean),], 15) 257 | 258 | data.m <- melt(ump_walks1, id.vars='Umpire') 259 | ggplot(data.m, aes(Umpire, value)) + 260 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 261 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 262 | ggtitle("Mean and Standard Deviations for Walks \n in Games Segmented by Home Plate Umpire") 263 | 264 | #Strikeouts by Umpire 265 | mean_strikeouts <- aggregate(SO.y ~ Umpire, data = games_final, mean) 266 | sd_strikeouts <- aggregate(SO.x ~ Umpire, data = games_final, sd) 267 | ump_strikeouts <- merge(mean_strikeouts, sd_strikeouts, by = "Umpire") 268 | names(ump_strikeouts)[2]<-"Mean" 269 | names(ump_strikeouts)[3]<-"Standard Deviation" 270 | ump_strikeouts$Umpire <- sub("^$", "Unknown", ump_strikeouts$Umpire) 271 | ump_strikeouts1 <- head(ump_strikeouts[order(-ump_strikeouts$Mean),], 15) 272 | 273 | data.m <- melt(ump_strikeouts1, id.vars='Umpire') 274 | ggplot(data.m, aes(Umpire, value)) + 275 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 276 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 277 | ggtitle("Mean and Standard Deviations for Strikeouts \n in Games Segmented by Home Plate Umpire") 278 | 279 | #Pitches by Umpire 280 | mean_pitches <- aggregate(Pit ~ Umpire, data = games_final, mean) 281 | sd_pitches <- aggregate(Pit ~ Umpire, data = games_final, sd) 282 | ump_pitches <- merge(mean_pitches, sd_pitches, by = "Umpire") 283 | names(ump_pitches)[2]<-"Mean" 284 | names(ump_pitches)[3]<-"Standard Deviation" 285 | ump_pitches$Umpire <- sub("^$", "Unknown", ump_pitches$Umpire) 286 | ump_pitches1 <- head(ump_pitches[order(-ump_pitches$Mean),], 15) 287 | 288 | data.m <- melt(ump_pitches1, id.vars='Umpire') 289 | ggplot(data.m, aes(Umpire, value)) + 290 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 291 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 292 | ggtitle("Mean and Standard Deviations for Pitches \n in Games Segmented by Home Plate Umpire") 293 | 294 | #Strikes by Umpire 295 | mean_strikes <- aggregate(Str ~ Umpire, data = games_final, mean) 296 | sd_strikes <- aggregate(Str ~ Umpire, data = games_final, sd) 297 | ump_strikes <- merge(mean_strikes, sd_strikes, by = "Umpire") 298 | names(ump_strikes)[2]<-"Mean" 299 | names(ump_strikes)[3]<-"Standard Deviation" 300 | ump_strikes$Umpire <- sub("^$", "Unknown", ump_strikes$Umpire) 301 | ump_strikes1 <- head(ump_strikes[order(-ump_strikes$Mean),], 15) 302 | 303 | data.m <- melt(ump_strikes1, id.vars='Umpire') 304 | ggplot(data.m, aes(Umpire, value)) + 305 | geom_bar(aes(fill = variable), position = "dodge", stat="identity") + 306 | theme(axis.text.x = element_text(angle = 60, hjust = 1)) + xlab("") + 307 | ggtitle("Mean and Standard Deviations for Strikes \n in Games Segmented by Home Plate Umpire") 308 | 309 | #Let's see what variables are most correlated 310 | correlations <- cor(games_final[10:59]) 311 | write.csv(correlations, file = "game_stat_correlations.csv") 312 | 313 | ##Cut each variable into quartiles 314 | game_num <- games_final[10:59] 315 | 316 | quartile3 <- function(x) { 317 | ntile(x, 4) 318 | } 319 | 320 | cut_data <- apply(game_num, 2, quartile3) 321 | cut_data <- data.frame(cut_data) 322 | cut_data <- data.frame(sapply(cut_data, as.factor)) 323 | summary(cut_data) 324 | 325 | ##Conduct association rules mining## 326 | 327 | #Tell R to treat the data as transactions 328 | game.trans <- as(cut_data, "transactions") 329 | summary(game.trans) 330 | 331 | #Calculate, summarize, and plot rules 332 | seg.rules <- apriori(game.trans, parameter = list(support=0.1, conf=0.3, target="rules")) 333 | summary(seg.rules) 334 | plot(seg.rules) 335 | 336 | #Inspect 50 rules with the highest lift 337 | seg.hi <- head(sort(seg.rules, by = "lift"), 50) 338 | inspect(seg.hi) 339 | plot(seg.hi, method = "graph", control = list(type="items")) 340 | 341 | #Let's drop a few variables that probably should not be in the analysis 342 | myvars <- names(cut_data) %in% c("BA", "OBP", "SLG", "OPS", "IR", "IS") 343 | cut_data1 <- cut_data[!myvars] 344 | 345 | ##Re-conduct association rules mining## 346 | #Tell R to treat the data as transactions 347 | game.trans1 <- as(cut_data1, "transactions") 348 | summary(game.trans1) 349 | 350 | #Calculate, summarize, and plot rules 351 | seg.rules1 <- apriori(game.trans1, parameter = list(support=0.1, conf=0.3, target="rules")) 352 | summary(seg.rules1) 353 | plot(seg.rules1) 354 | 355 | #Inspect 50 rules with the highest lift 356 | seg.hi1 <- head(sort(seg.rules1, by = "lift"), 500) 357 | inspect(seg.hi1) 358 | plot(seg.hi1, method = "graph", control = list(type="items")) 359 | -------------------------------------------------------------------------------- /python_scripts/match_up_simulations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def run_match_up_simulation(fastball_surrender_prob, curve_surrender_prob, change_surrender_prob, fastball_hit_prob, 6 | curve_hit_prob, change_hit_prob, zero_zero_pitch_probs, zero_zero_hit_surrender_prob, 7 | zero_zero_hit_prob, one_zero_pitch_probs, one_zero_hit_surrender_prob, one_zero_hit_prob, 8 | zero_one_pitch_probs, zero_one_hit_surrender_prob, zero_one_hit_prob, one_one_pitch_probs, 9 | one_one_hit_surrender_prob, one_one_hit_prob, two_zero_pitch_probs, 10 | two_zero_hit_surrender_prob, two_zero_hit_prob, zero_two_pitch_probs, 11 | zero_two_hit_surrender_prob, zero_two_hit_prob, one_two_pitch_probs, 12 | one_two_hit_surrender_prob, one_two_hit_prob, two_one_pitch_probs, 13 | two_one_hit_surrender_prob, two_one_hit_prob, three_zero_pitch_probs, 14 | three_zero_hit_surrender_prob, three_zero_hit_prob, two_two_pitch_probs, 15 | two_two_hit_surrender_prob, two_two_hit_prob, three_one_pitch_probs, 16 | three_one_hit_surrender_prob, three_one_hit_prob, three_two_pitch_probs, 17 | three_two_hit_surrender_prob, three_two_hit_prob, zero_zero_swing_prob, 18 | one_zero_swing_prob, zero_one_swing_prob, one_one_swing_prob, 19 | two_zero_swing_prob, zero_two_swing_prob, one_two_swing_prob, 20 | two_one_swing_prob, three_zero_swing_prob, two_two_swing_prob, 21 | three_one_swing_prob, three_two_swing_prob, swing_produces_out): 22 | 23 | def run_pitch_simulation(count, count_swing_prob, pitch_probs, count_surrender_prob, fastball_surrender_prob, 24 | count_hit_prob, fastball_hit_prob, curve_surrender_prob, curve_hit_prob, 25 | change_surrender_prob, change_hit_prob, df): 26 | 27 | pitch = np.random.choice(a=['fastball', 'curve', 'change'], p=pitch_probs) 28 | swing = np.random.choice(a=['yes', 'no'], p=[count_swing_prob, 1 - count_swing_prob]) 29 | 30 | if pitch == 'fastball': 31 | hit_prob = (count_surrender_prob + fastball_surrender_prob + count_hit_prob + fastball_hit_prob) / 4 32 | if swing == 'yes': 33 | outcome = np.random.choice(a=['hit', 'no_hit'], p=[hit_prob, 1 - hit_prob]) 34 | elif swing == 'no': 35 | outcome = 'no_hit' 36 | df = df.append(pd.DataFrame({'count': count, 'pitch': 'fastball', 'swing': swing, 'result': [outcome]})) 37 | 38 | elif pitch == 'curve': 39 | hit_prob = (count_surrender_prob + curve_surrender_prob + count_hit_prob + curve_hit_prob) / 4 40 | if swing == 'yes': 41 | outcome = np.random.choice(a=['hit', 'no_hit'], p=[hit_prob, 1 - hit_prob]) 42 | elif swing == 'no': 43 | outcome = 'no_hit' 44 | df = df.append(pd.DataFrame({'count': count, 'pitch': 'curve', 'swing': swing, 'result': [outcome]})) 45 | 46 | elif pitch == 'change': 47 | hit_prob = (count_surrender_prob + change_surrender_prob + count_hit_prob + change_hit_prob) / 4 48 | if swing == 'yes': 49 | outcome = np.random.choice(a=['hit', 'no_hit'], p=[hit_prob, 1 - hit_prob]) 50 | elif swing == 'no': 51 | outcome = 'no_hit' 52 | df = df.append(pd.DataFrame({'count': count, 'pitch': 'change', 'swing': swing, 'result': [outcome]})) 53 | 54 | return df 55 | 56 | at_bat_results = pd.DataFrame() 57 | 58 | at_bat_results = run_pitch_simulation(count='0-0', 59 | count_swing_prob=zero_zero_swing_prob, 60 | pitch_probs=zero_zero_pitch_probs, 61 | count_surrender_prob=zero_zero_hit_surrender_prob, 62 | fastball_surrender_prob=fastball_surrender_prob, 63 | count_hit_prob=zero_zero_hit_prob, 64 | fastball_hit_prob=fastball_hit_prob, 65 | curve_surrender_prob=curve_surrender_prob, 66 | curve_hit_prob=curve_hit_prob, 67 | change_surrender_prob=change_surrender_prob, 68 | change_hit_prob=change_hit_prob, 69 | df=at_bat_results) 70 | 71 | last_row_df = at_bat_results.tail(1) 72 | 73 | if last_row_df['swing'].any() == 'yes': 74 | end_of_at_bat = np.random.choice(a=['yes', 'no'], p=[swing_produces_out, 1 - swing_produces_out]) 75 | else: 76 | end_of_at_bat = 'no' 77 | 78 | if at_bat_results['result'].any() == 'no_hit' and end_of_at_bat == 'no': 79 | new_count = np.random.choice(a=['0-1', '1-0'], p=[0.50, 0.50]) 80 | 81 | if new_count == '0-1': 82 | at_bat_results = run_pitch_simulation(count='0-1', 83 | count_swing_prob=zero_one_swing_prob, 84 | pitch_probs=zero_one_pitch_probs, 85 | count_surrender_prob=zero_one_hit_surrender_prob, 86 | fastball_surrender_prob=fastball_surrender_prob, 87 | count_hit_prob=zero_one_hit_prob, 88 | fastball_hit_prob=fastball_hit_prob, 89 | curve_surrender_prob=curve_surrender_prob, 90 | curve_hit_prob=curve_hit_prob, 91 | change_surrender_prob=change_surrender_prob, 92 | change_hit_prob=change_hit_prob, 93 | df=at_bat_results) 94 | 95 | elif new_count == '1-0': 96 | at_bat_results = run_pitch_simulation(count='1-0', 97 | count_swing_prob=one_zero_swing_prob, 98 | pitch_probs=one_zero_pitch_probs, 99 | count_surrender_prob=one_zero_hit_surrender_prob, 100 | fastball_surrender_prob=fastball_surrender_prob, 101 | count_hit_prob=one_zero_hit_prob, 102 | fastball_hit_prob=fastball_hit_prob, 103 | curve_surrender_prob=curve_surrender_prob, 104 | curve_hit_prob=curve_hit_prob, 105 | change_surrender_prob=change_surrender_prob, 106 | change_hit_prob=change_hit_prob, 107 | df=at_bat_results) 108 | 109 | last_row_df = at_bat_results.tail(1) 110 | 111 | if last_row_df['swing'].any() == 'yes': 112 | end_of_at_bat = np.random.choice(a=['yes', 'no'], p=[swing_produces_out, 1 - swing_produces_out]) 113 | else: 114 | end_of_at_bat = 'no' 115 | 116 | if last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '1-0' and end_of_at_bat == 'no': 117 | new_count = np.random.choice(a=['1-1', '2-0'], p=[0.50, 0.50]) 118 | 119 | elif last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '0-1': 120 | new_count = np.random.choice(a=['1-1', '0-2'], p=[0.50, 0.50]) 121 | 122 | if new_count == '1-1': 123 | at_bat_results = run_pitch_simulation(count='1-1', 124 | count_swing_prob=one_one_swing_prob, 125 | pitch_probs=one_one_pitch_probs, 126 | count_surrender_prob=one_one_hit_surrender_prob, 127 | fastball_surrender_prob=fastball_surrender_prob, 128 | count_hit_prob=one_one_hit_prob, 129 | fastball_hit_prob=fastball_hit_prob, 130 | curve_surrender_prob=curve_surrender_prob, 131 | curve_hit_prob=curve_hit_prob, 132 | change_surrender_prob=change_surrender_prob, 133 | change_hit_prob=change_hit_prob, 134 | df=at_bat_results) 135 | 136 | elif new_count == '2-0': 137 | at_bat_results = run_pitch_simulation(count='2-0', 138 | count_swing_prob=two_zero_swing_prob, 139 | pitch_probs=two_zero_pitch_probs, 140 | count_surrender_prob=two_zero_hit_surrender_prob, 141 | fastball_surrender_prob=fastball_surrender_prob, 142 | count_hit_prob=two_zero_hit_prob, 143 | fastball_hit_prob=fastball_hit_prob, 144 | curve_surrender_prob=curve_surrender_prob, 145 | curve_hit_prob=curve_hit_prob, 146 | change_surrender_prob=change_surrender_prob, 147 | change_hit_prob=change_hit_prob, 148 | df=at_bat_results) 149 | 150 | elif new_count == '0-2': 151 | at_bat_results = run_pitch_simulation(count='0-2', 152 | count_swing_prob=zero_two_swing_prob, 153 | pitch_probs=zero_two_pitch_probs, 154 | count_surrender_prob=zero_two_hit_surrender_prob, 155 | fastball_surrender_prob=fastball_surrender_prob, 156 | count_hit_prob=zero_two_hit_prob, 157 | fastball_hit_prob=fastball_hit_prob, 158 | curve_surrender_prob=curve_surrender_prob, 159 | curve_hit_prob=curve_hit_prob, 160 | change_surrender_prob=change_surrender_prob, 161 | change_hit_prob=change_hit_prob, 162 | df=at_bat_results) 163 | 164 | last_row_df = at_bat_results.tail(1) 165 | 166 | if last_row_df['swing'].any() == 'yes': 167 | end_of_at_bat = np.random.choice(a=['yes', 'no'], p=[swing_produces_out, 1 - swing_produces_out]) 168 | else: 169 | end_of_at_bat = 'no' 170 | 171 | if last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '1-1' and end_of_at_bat == 'no': 172 | new_count = np.random.choice(a=['1-2', '2-1'], p=[0.50, 0.50]) 173 | 174 | elif last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '2-0': 175 | new_count = np.random.choice(a=['2-1', '3-0'], p=[0.50, 0.50]) 176 | 177 | elif last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '0-2': 178 | new_count = '1-2' 179 | 180 | if new_count == '1-2': 181 | at_bat_results = run_pitch_simulation(count='1-2', 182 | count_swing_prob=one_two_swing_prob, 183 | pitch_probs=one_two_pitch_probs, 184 | count_surrender_prob=one_two_hit_surrender_prob, 185 | fastball_surrender_prob=fastball_surrender_prob, 186 | count_hit_prob=one_two_hit_prob, 187 | fastball_hit_prob=fastball_hit_prob, 188 | curve_surrender_prob=curve_surrender_prob, 189 | curve_hit_prob=curve_hit_prob, 190 | change_surrender_prob=change_surrender_prob, 191 | change_hit_prob=change_hit_prob, 192 | df=at_bat_results) 193 | 194 | if new_count == '2-1': 195 | at_bat_results = run_pitch_simulation(count='2-1', 196 | count_swing_prob=two_one_swing_prob, 197 | pitch_probs=two_one_pitch_probs, 198 | count_surrender_prob=two_one_hit_surrender_prob, 199 | fastball_surrender_prob=fastball_surrender_prob, 200 | count_hit_prob=two_one_hit_prob, 201 | fastball_hit_prob=fastball_hit_prob, 202 | curve_surrender_prob=curve_surrender_prob, 203 | curve_hit_prob=curve_hit_prob, 204 | change_surrender_prob=change_surrender_prob, 205 | change_hit_prob=change_hit_prob, 206 | df=at_bat_results) 207 | 208 | if new_count == '3-0': 209 | at_bat_results = run_pitch_simulation(count='3-0', 210 | count_swing_prob=three_zero_swing_prob, 211 | pitch_probs=three_zero_pitch_probs, 212 | count_surrender_prob=three_zero_hit_surrender_prob, 213 | fastball_surrender_prob=fastball_surrender_prob, 214 | count_hit_prob=three_zero_hit_prob, 215 | fastball_hit_prob=fastball_hit_prob, 216 | curve_surrender_prob=curve_surrender_prob, 217 | curve_hit_prob=curve_hit_prob, 218 | change_surrender_prob=change_surrender_prob, 219 | change_hit_prob=change_hit_prob, 220 | df=at_bat_results) 221 | 222 | last_row_df = at_bat_results.tail(1) 223 | 224 | if last_row_df['swing'].any() == 'yes': 225 | end_of_at_bat = np.random.choice(a=['yes', 'no'], p=[swing_produces_out, 1 - swing_produces_out]) 226 | else: 227 | end_of_at_bat = 'no' 228 | 229 | if last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '1-2' and end_of_at_bat == 'no': 230 | new_count = '2-2' 231 | 232 | elif last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '2-1': 233 | new_count = np.random.choice(a=['2-2', '3-1'], p=[0.50, 0.50]) 234 | 235 | elif last_row_df['result'].any() == 'no_hit' and last_row_df['count'].any() == '3-0': 236 | new_count = '3-1' 237 | 238 | if new_count == '2-2': 239 | at_bat_results = run_pitch_simulation(count='2-2', 240 | count_swing_prob=two_two_swing_prob, 241 | pitch_probs=two_two_pitch_probs, 242 | count_surrender_prob=two_two_hit_surrender_prob, 243 | fastball_surrender_prob=fastball_surrender_prob, 244 | count_hit_prob=two_two_hit_prob, 245 | fastball_hit_prob=fastball_hit_prob, 246 | curve_surrender_prob=curve_surrender_prob, 247 | curve_hit_prob=curve_hit_prob, 248 | change_surrender_prob=change_surrender_prob, 249 | change_hit_prob=change_hit_prob, 250 | df=at_bat_results) 251 | 252 | if new_count == '3-1': 253 | at_bat_results = run_pitch_simulation(count='3-1', 254 | count_swing_prob=three_one_swing_prob, 255 | pitch_probs=three_one_pitch_probs, 256 | count_surrender_prob=three_one_hit_surrender_prob, 257 | fastball_surrender_prob=fastball_surrender_prob, 258 | count_hit_prob=three_one_hit_prob, 259 | fastball_hit_prob=fastball_hit_prob, 260 | curve_surrender_prob=curve_surrender_prob, 261 | curve_hit_prob=curve_hit_prob, 262 | change_surrender_prob=change_surrender_prob, 263 | change_hit_prob=change_hit_prob, 264 | df=at_bat_results) 265 | 266 | last_row_df = at_bat_results.tail(1) 267 | 268 | if last_row_df['swing'].any() == 'yes': 269 | end_of_at_bat = np.random.choice(a=['yes', 'no'], p=[swing_produces_out, 1 - swing_produces_out]) 270 | else: 271 | end_of_at_bat = 'no' 272 | 273 | if last_row_df['result'].any() == 'no_hit' and end_of_at_bat == 'no': 274 | at_bat_results = run_pitch_simulation(count='3-2', 275 | count_swing_prob=three_two_swing_prob, 276 | pitch_probs=three_two_pitch_probs, 277 | count_surrender_prob=three_two_hit_surrender_prob, 278 | fastball_surrender_prob=fastball_surrender_prob, 279 | count_hit_prob=three_two_hit_prob, 280 | fastball_hit_prob=fastball_hit_prob, 281 | curve_surrender_prob=curve_surrender_prob, 282 | curve_hit_prob=curve_hit_prob, 283 | change_surrender_prob=change_surrender_prob, 284 | change_hit_prob=change_hit_prob, 285 | df=at_bat_results) 286 | 287 | return at_bat_results 288 | 289 | 290 | if __name__ == "__main__": 291 | fastball_surrender_prob = 0.240 292 | curve_surrender_prob = 0.260 293 | change_surrender_prob = 0.250 294 | 295 | fastball_hit_prob = 0.300 296 | curve_hit_prob = 0.230 297 | change_hit_prob = 0.270 298 | 299 | zero_zero_pitch_probs = [0.60, 0.25, 0.15] 300 | zero_zero_hit_surrender_prob = 0.250 301 | zero_zero_hit_prob = 0.260 302 | zero_zero_swing_prob = 0.250 303 | 304 | one_zero_pitch_probs = [0.65, 0.20, 0.15] 305 | one_zero_hit_surrender_prob = 0.255 306 | one_zero_hit_prob = 0.265 307 | one_zero_swing_prob = 0.40 308 | 309 | zero_one_pitch_probs = [0.55, 0.25, 0.20] 310 | zero_one_hit_surrender_prob = 0.245 311 | zero_one_hit_prob = 0.255 312 | zero_one_swing_prob = 0.450 313 | 314 | one_one_pitch_probs = [0.60, 0.20, 0.20] 315 | one_one_hit_surrender_prob = 0.250 316 | one_one_hit_prob = 0.260 317 | one_one_swing_prob = 0.50 318 | 319 | two_zero_pitch_probs = [0.75, 0.20, 0.05] 320 | two_zero_hit_surrender_prob = 0.275 321 | two_zero_hit_prob = 0.285 322 | two_zero_swing_prob = 0.40 323 | 324 | zero_two_pitch_probs = [0.45, 0.35, 0.20] 325 | zero_two_hit_surrender_prob = 0.205 326 | zero_two_hit_prob = 0.220 327 | zero_two_swing_prob = 0.50 328 | 329 | one_two_pitch_probs = [0.55, 0.20, 0.25] 330 | one_two_hit_surrender_prob = 0.215 331 | one_two_hit_prob = 0.230 332 | one_two_swing_prob = 0.60 333 | 334 | two_one_pitch_probs = [0.65, 0.20, 0.15] 335 | two_one_hit_surrender_prob = 0.270 336 | two_one_hit_prob = 0.280 337 | two_one_swing_prob = 0.60 338 | 339 | three_zero_pitch_probs = [0.90, 0.05, 0.05] 340 | three_zero_hit_surrender_prob = 0.275 341 | three_zero_hit_prob = 0.285 342 | three_zero_swing_prob = 0.10 343 | 344 | two_two_pitch_probs = [0.65, 0.20, 0.15] 345 | two_two_hit_surrender_prob = 0.230 346 | two_two_hit_prob = 0.245 347 | two_two_swing_prob = 0.65 348 | 349 | three_one_pitch_probs = [0.75, 0.20, 0.05] 350 | three_one_hit_surrender_prob = 0.275 351 | three_one_hit_prob = 0.285 352 | three_one_swing_prob = 0.55 353 | 354 | three_two_pitch_probs = [0.70, 0.15, 0.15] 355 | three_two_hit_surrender_prob = 0.260 356 | three_two_hit_prob = 0.265 357 | three_two_swing_prob = 0.75 358 | 359 | swing_produces_out = 0.80 360 | 361 | at_bat_results = pd.DataFrame() 362 | 363 | simulation_runs = 100 364 | counter = 0 365 | while counter < simulation_runs: 366 | temp_df = run_match_up_simulation(fastball_surrender_prob, curve_surrender_prob, change_surrender_prob, 367 | fastball_hit_prob, curve_hit_prob, change_hit_prob, zero_zero_pitch_probs, 368 | zero_zero_hit_surrender_prob, zero_zero_hit_prob, one_zero_pitch_probs, 369 | one_zero_hit_surrender_prob, one_zero_hit_prob, zero_one_pitch_probs, 370 | zero_one_hit_surrender_prob, zero_one_hit_prob, one_one_pitch_probs, 371 | one_one_hit_surrender_prob, one_one_hit_prob, two_zero_pitch_probs, 372 | two_zero_hit_surrender_prob, two_zero_hit_prob, zero_two_pitch_probs, 373 | zero_two_hit_surrender_prob, zero_two_hit_prob, one_two_pitch_probs, 374 | one_two_hit_surrender_prob, one_two_hit_prob, two_one_pitch_probs, 375 | two_one_hit_surrender_prob, two_one_hit_prob, three_zero_pitch_probs, 376 | three_zero_hit_surrender_prob, three_zero_hit_prob, two_two_pitch_probs, 377 | two_two_hit_surrender_prob, two_two_hit_prob, three_one_pitch_probs, 378 | three_one_hit_surrender_prob, three_one_hit_prob, three_two_pitch_probs, 379 | three_two_hit_surrender_prob, three_two_hit_prob, zero_zero_swing_prob, 380 | one_zero_swing_prob, zero_one_swing_prob, one_one_swing_prob, 381 | two_zero_swing_prob, zero_two_swing_prob, one_two_swing_prob, 382 | two_one_swing_prob, three_zero_swing_prob, two_two_swing_prob, 383 | three_one_swing_prob, three_two_swing_prob, swing_produces_out) 384 | 385 | at_bat_results = at_bat_results.append(temp_df) 386 | counter += 1 387 | 388 | at_bat_results.to_csv('simulation_results.csv', index=False) 389 | -------------------------------------------------------------------------------- /csv_outputs/historical_team_clustering_results.csv: -------------------------------------------------------------------------------- 1 | yearID,franchID,cluster 2 | 1946,BOS,1 3 | 1946,CLE,1 4 | 1946,DET,1 5 | 1946,NYY,1 6 | 1947,NYY,1 7 | 1948,LAD,1 8 | 1950,CHC,1 9 | 1950,CLE,1 10 | 1950,SFG,1 11 | 1951,CLE,1 12 | 1952,BOS,1 13 | 1952,LAD,1 14 | 1952,ATL,1 15 | 1952,CHC,1 16 | 1952,CIN,1 17 | 1952,CLE,1 18 | 1952,DET,1 19 | 1952,SFG,1 20 | 1952,NYY,1 21 | 1952,PIT,1 22 | 1953,LAD,1 23 | 1953,CLE,1 24 | 1953,ATL,1 25 | 1954,BAL,1 26 | 1954,LAD,1 27 | 1954,CLE,1 28 | 1954,ATL,1 29 | 1954,SFG,1 30 | 1954,NYY,1 31 | 1954,MIN,1 32 | 1955,BAL,1 33 | 1955,BOS,1 34 | 1955,LAD,1 35 | 1955,CHW,1 36 | 1955,CHC,1 37 | 1955,CLE,1 38 | 1955,ATL,1 39 | 1955,SFG,1 40 | 1955,NYY,1 41 | 1955,PHI,1 42 | 1955,PIT,1 43 | 1955,STL,1 44 | 1956,BAL,1 45 | 1956,LAD,1 46 | 1956,CHW,1 47 | 1956,CHC,1 48 | 1956,CIN,1 49 | 1956,CLE,1 50 | 1956,OAK,1 51 | 1956,ATL,1 52 | 1956,SFG,1 53 | 1956,NYY,1 54 | 1956,PHI,1 55 | 1956,PIT,1 56 | 1956,STL,1 57 | 1956,MIN,1 58 | 1957,BAL,1 59 | 1957,BOS,1 60 | 1957,LAD,1 61 | 1957,CHW,1 62 | 1957,CHC,1 63 | 1957,CLE,1 64 | 1957,DET,1 65 | 1957,OAK,1 66 | 1957,ATL,1 67 | 1957,SFG,1 68 | 1957,NYY,1 69 | 1957,PHI,1 70 | 1957,PIT,1 71 | 1957,STL,1 72 | 1957,MIN,1 73 | 1958,BAL,1 74 | 1958,BOS,1 75 | 1958,CHW,1 76 | 1958,CHC,1 77 | 1958,CIN,1 78 | 1958,CLE,1 79 | 1958,DET,1 80 | 1958,OAK,1 81 | 1958,LAD,1 82 | 1958,ATL,1 83 | 1958,NYY,1 84 | 1958,PHI,1 85 | 1958,PIT,1 86 | 1958,SFG,1 87 | 1958,STL,1 88 | 1958,MIN,1 89 | 1959,BAL,1 90 | 1959,BOS,1 91 | 1959,CHW,1 92 | 1959,CHC,1 93 | 1959,CIN,1 94 | 1959,CLE,1 95 | 1959,DET,1 96 | 1959,OAK,1 97 | 1959,ATL,1 98 | 1959,NYY,1 99 | 1959,PHI,1 100 | 1959,PIT,1 101 | 1959,SFG,1 102 | 1959,STL,1 103 | 1959,MIN,1 104 | 1960,BAL,1 105 | 1960,BOS,1 106 | 1960,CHW,1 107 | 1960,CHC,1 108 | 1960,CIN,1 109 | 1960,CLE,1 110 | 1960,DET,1 111 | 1960,OAK,1 112 | 1960,LAD,1 113 | 1960,ATL,1 114 | 1960,NYY,1 115 | 1960,PHI,1 116 | 1960,PIT,1 117 | 1960,SFG,1 118 | 1960,STL,1 119 | 1960,MIN,1 120 | 1961,BAL,1 121 | 1961,BOS,1 122 | 1961,CHW,1 123 | 1961,CIN,1 124 | 1961,CLE,1 125 | 1961,DET,1 126 | 1961,OAK,1 127 | 1961,MIN,1 128 | 1961,ATL,1 129 | 1961,NYY,1 130 | 1961,PHI,1 131 | 1961,PIT,1 132 | 1961,SFG,1 133 | 1961,STL,1 134 | 1961,TEX,1 135 | 1962,BAL,1 136 | 1962,BOS,1 137 | 1962,CHW,1 138 | 1962,CHC,1 139 | 1962,CIN,1 140 | 1962,CLE,1 141 | 1962,DET,1 142 | 1962,HOU,1 143 | 1962,OAK,1 144 | 1962,ANA,1 145 | 1962,LAD,1 146 | 1962,MIN,1 147 | 1962,ATL,1 148 | 1962,NYY,1 149 | 1962,NYM,1 150 | 1962,PHI,1 151 | 1962,PIT,1 152 | 1962,SFG,1 153 | 1962,STL,1 154 | 1962,TEX,1 155 | 1963,BAL,1 156 | 1963,CHW,1 157 | 1963,CHC,1 158 | 1963,CIN,1 159 | 1963,DET,1 160 | 1963,HOU,1 161 | 1963,OAK,1 162 | 1963,ANA,1 163 | 1963,LAD,1 164 | 1963,MIN,1 165 | 1963,ATL,1 166 | 1963,NYY,1 167 | 1963,NYM,1 168 | 1963,PHI,1 169 | 1963,PIT,1 170 | 1963,SFG,1 171 | 1963,STL,1 172 | 1963,TEX,1 173 | 1964,BAL,1 174 | 1964,CHW,1 175 | 1964,CHC,1 176 | 1964,CIN,1 177 | 1964,DET,1 178 | 1964,HOU,1 179 | 1964,ANA,1 180 | 1964,LAD,1 181 | 1964,ATL,1 182 | 1964,NYY,1 183 | 1964,NYM,1 184 | 1964,PHI,1 185 | 1964,PIT,1 186 | 1964,SFG,1 187 | 1964,STL,1 188 | 1964,TEX,1 189 | 1965,BAL,1 190 | 1965,ANA,1 191 | 1965,CHW,1 192 | 1965,CHC,1 193 | 1965,CLE,1 194 | 1965,DET,1 195 | 1965,HOU,1 196 | 1965,OAK,1 197 | 1965,LAD,1 198 | 1965,MIN,1 199 | 1965,ATL,1 200 | 1965,NYY,1 201 | 1965,NYM,1 202 | 1965,PIT,1 203 | 1965,SFG,1 204 | 1965,STL,1 205 | 1965,TEX,1 206 | 1966,ATL,1 207 | 1966,BAL,1 208 | 1966,BOS,1 209 | 1966,ANA,1 210 | 1966,CHW,1 211 | 1966,CLE,1 212 | 1966,HOU,1 213 | 1966,OAK,1 214 | 1966,LAD,1 215 | 1966,MIN,1 216 | 1966,NYY,1 217 | 1966,NYM,1 218 | 1966,PHI,1 219 | 1966,PIT,1 220 | 1966,SFG,1 221 | 1966,STL,1 222 | 1966,TEX,1 223 | 1967,ATL,1 224 | 1967,BAL,1 225 | 1967,BOS,1 226 | 1967,ANA,1 227 | 1967,CHW,1 228 | 1967,CHC,1 229 | 1967,CIN,1 230 | 1967,CLE,1 231 | 1967,DET,1 232 | 1967,HOU,1 233 | 1967,OAK,1 234 | 1967,LAD,1 235 | 1967,MIN,1 236 | 1967,NYY,1 237 | 1967,NYM,1 238 | 1967,PHI,1 239 | 1967,PIT,1 240 | 1967,SFG,1 241 | 1967,STL,1 242 | 1967,TEX,1 243 | 1968,ATL,1 244 | 1968,BAL,1 245 | 1968,BOS,1 246 | 1968,ANA,1 247 | 1968,CHW,1 248 | 1968,CHC,1 249 | 1968,CIN,1 250 | 1968,CLE,1 251 | 1968,DET,1 252 | 1968,HOU,1 253 | 1968,LAD,1 254 | 1968,MIN,1 255 | 1968,NYY,1 256 | 1968,NYM,1 257 | 1968,OAK,1 258 | 1968,PHI,1 259 | 1968,PIT,1 260 | 1968,SFG,1 261 | 1968,STL,1 262 | 1968,TEX,1 263 | 1969,ATL,1 264 | 1969,BAL,1 265 | 1969,BOS,1 266 | 1969,ANA,1 267 | 1969,CHW,1 268 | 1969,CHC,1 269 | 1969,CLE,1 270 | 1969,DET,1 271 | 1969,KCR,1 272 | 1969,LAD,1 273 | 1969,MIN,1 274 | 1969,WSN,1 275 | 1969,NYY,1 276 | 1969,NYM,1 277 | 1969,OAK,1 278 | 1969,SDP,1 279 | 1969,MIL,1 280 | 1969,SFG,1 281 | 1969,STL,1 282 | 1969,TEX,1 283 | 1970,ATL,1 284 | 1970,BAL,1 285 | 1970,BOS,1 286 | 1970,ANA,1 287 | 1970,CHW,1 288 | 1970,CHC,1 289 | 1970,CIN,1 290 | 1970,CLE,1 291 | 1970,DET,1 292 | 1970,HOU,1 293 | 1970,KCR,1 294 | 1970,LAD,1 295 | 1970,MIN,1 296 | 1970,MIL,1 297 | 1970,WSN,1 298 | 1970,NYY,1 299 | 1970,OAK,1 300 | 1970,PIT,1 301 | 1970,TEX,1 302 | 1971,ATL,1 303 | 1971,BAL,1 304 | 1971,BOS,1 305 | 1971,ANA,1 306 | 1971,CHW,1 307 | 1971,CHC,1 308 | 1971,CIN,1 309 | 1971,CLE,1 310 | 1971,DET,1 311 | 1971,HOU,1 312 | 1971,KCR,1 313 | 1971,LAD,1 314 | 1971,MIN,1 315 | 1971,MIL,1 316 | 1971,WSN,1 317 | 1971,NYY,1 318 | 1971,NYM,1 319 | 1971,OAK,1 320 | 1971,PHI,1 321 | 1971,PIT,1 322 | 1971,SDP,1 323 | 1971,SFG,1 324 | 1971,STL,1 325 | 1971,TEX,1 326 | 1972,ATL,1 327 | 1972,BAL,1 328 | 1972,BOS,1 329 | 1972,ANA,1 330 | 1972,CHW,1 331 | 1972,CHC,1 332 | 1972,CIN,1 333 | 1972,CLE,1 334 | 1972,DET,1 335 | 1972,KCR,1 336 | 1972,LAD,1 337 | 1972,MIN,1 338 | 1972,MIL,1 339 | 1972,WSN,1 340 | 1972,NYM,1 341 | 1972,OAK,1 342 | 1972,PHI,1 343 | 1972,PIT,1 344 | 1972,SDP,1 345 | 1972,SFG,1 346 | 1972,STL,1 347 | 1972,TEX,1 348 | 1973,ATL,1 349 | 1973,BAL,1 350 | 1973,BOS,1 351 | 1973,ANA,1 352 | 1973,CHW,1 353 | 1973,CHC,1 354 | 1973,CIN,1 355 | 1973,CLE,1 356 | 1973,DET,1 357 | 1973,HOU,1 358 | 1973,KCR,1 359 | 1973,LAD,1 360 | 1973,MIN,1 361 | 1973,MIL,1 362 | 1973,WSN,1 363 | 1973,NYY,1 364 | 1973,NYM,1 365 | 1973,OAK,1 366 | 1973,PHI,1 367 | 1973,PIT,1 368 | 1973,SDP,1 369 | 1973,SFG,1 370 | 1973,STL,1 371 | 1973,TEX,1 372 | 1974,ATL,1 373 | 1974,BAL,1 374 | 1974,BOS,1 375 | 1974,ANA,1 376 | 1974,CHW,1 377 | 1974,CHC,1 378 | 1974,CIN,1 379 | 1974,CLE,1 380 | 1974,DET,1 381 | 1974,HOU,1 382 | 1974,KCR,1 383 | 1974,LAD,1 384 | 1974,MIN,1 385 | 1974,MIL,1 386 | 1974,WSN,1 387 | 1974,NYY,1 388 | 1974,NYM,1 389 | 1974,OAK,1 390 | 1974,PHI,1 391 | 1974,PIT,1 392 | 1974,SDP,1 393 | 1974,SFG,1 394 | 1974,STL,1 395 | 1974,TEX,1 396 | 1975,ATL,1 397 | 1975,BAL,1 398 | 1975,BOS,1 399 | 1975,ANA,1 400 | 1975,CHW,1 401 | 1975,CHC,1 402 | 1975,CIN,1 403 | 1975,CLE,1 404 | 1975,DET,1 405 | 1975,HOU,1 406 | 1975,KCR,1 407 | 1975,LAD,1 408 | 1975,MIN,1 409 | 1975,MIL,1 410 | 1975,WSN,1 411 | 1975,NYY,1 412 | 1975,NYM,1 413 | 1975,OAK,1 414 | 1975,PHI,1 415 | 1975,PIT,1 416 | 1975,SDP,1 417 | 1975,SFG,1 418 | 1975,STL,1 419 | 1975,TEX,1 420 | 1976,ATL,1 421 | 1976,BAL,1 422 | 1976,BOS,1 423 | 1976,ANA,1 424 | 1976,CHW,1 425 | 1976,CHC,1 426 | 1976,CIN,1 427 | 1976,CLE,1 428 | 1976,DET,1 429 | 1976,HOU,1 430 | 1976,LAD,1 431 | 1976,MIN,1 432 | 1976,MIL,1 433 | 1976,WSN,1 434 | 1976,NYM,1 435 | 1976,OAK,1 436 | 1976,PHI,1 437 | 1976,PIT,1 438 | 1976,SFG,1 439 | 1976,STL,1 440 | 1976,TEX,1 441 | 1977,BAL,1 442 | 1977,BOS,1 443 | 1977,ANA,1 444 | 1977,CHC,1 445 | 1977,CIN,1 446 | 1977,CLE,1 447 | 1977,DET,1 448 | 1977,HOU,1 449 | 1977,KCR,1 450 | 1977,LAD,1 451 | 1977,MIL,1 452 | 1977,WSN,1 453 | 1977,NYY,1 454 | 1977,NYM,1 455 | 1977,OAK,1 456 | 1977,PHI,1 457 | 1977,PIT,1 458 | 1977,SEA,1 459 | 1977,SFG,1 460 | 1977,STL,1 461 | 1977,TEX,1 462 | 1977,TOR,1 463 | 1978,ATL,1 464 | 1978,BAL,1 465 | 1978,BOS,1 466 | 1978,ANA,1 467 | 1978,CHW,1 468 | 1978,CHC,1 469 | 1978,CIN,1 470 | 1978,CLE,1 471 | 1978,DET,1 472 | 1978,HOU,1 473 | 1978,LAD,1 474 | 1978,MIN,1 475 | 1978,MIL,1 476 | 1978,WSN,1 477 | 1978,NYY,1 478 | 1978,NYM,1 479 | 1978,OAK,1 480 | 1978,PHI,1 481 | 1978,PIT,1 482 | 1978,SDP,1 483 | 1978,SEA,1 484 | 1978,SFG,1 485 | 1978,STL,1 486 | 1978,TEX,1 487 | 1978,TOR,1 488 | 1979,ATL,1 489 | 1979,BAL,1 490 | 1979,ANA,1 491 | 1979,CHC,1 492 | 1979,CIN,1 493 | 1979,CLE,1 494 | 1979,DET,1 495 | 1979,HOU,1 496 | 1979,LAD,1 497 | 1979,WSN,1 498 | 1979,NYY,1 499 | 1979,NYM,1 500 | 1979,OAK,1 501 | 1979,PHI,1 502 | 1979,PIT,1 503 | 1979,SDP,1 504 | 1979,SFG,1 505 | 1979,STL,1 506 | 1979,TEX,1 507 | 1980,ATL,1 508 | 1980,BAL,1 509 | 1980,ANA,1 510 | 1980,CHW,1 511 | 1980,CHC,1 512 | 1980,CIN,1 513 | 1980,DET,1 514 | 1980,HOU,1 515 | 1980,LAD,1 516 | 1980,MIN,1 517 | 1980,WSN,1 518 | 1980,NYY,1 519 | 1980,NYM,1 520 | 1980,OAK,1 521 | 1980,PHI,1 522 | 1980,PIT,1 523 | 1980,SDP,1 524 | 1980,SEA,1 525 | 1980,SFG,1 526 | 1980,STL,1 527 | 1980,TOR,1 528 | 1981,ATL,1 529 | 1981,BAL,1 530 | 1981,BOS,1 531 | 1981,ANA,1 532 | 1981,CHW,1 533 | 1981,CHC,1 534 | 1981,CIN,1 535 | 1981,CLE,1 536 | 1981,DET,1 537 | 1981,HOU,1 538 | 1981,LAD,1 539 | 1981,MIN,1 540 | 1981,MIL,1 541 | 1981,WSN,1 542 | 1981,NYY,1 543 | 1981,NYM,1 544 | 1981,OAK,1 545 | 1981,PHI,1 546 | 1981,PIT,1 547 | 1981,SDP,1 548 | 1981,SEA,1 549 | 1981,SFG,1 550 | 1981,STL,1 551 | 1981,TEX,1 552 | 1981,TOR,1 553 | 1982,ATL,1 554 | 1982,BAL,1 555 | 1982,BOS,1 556 | 1982,ANA,1 557 | 1982,CHW,1 558 | 1982,CHC,1 559 | 1982,CIN,1 560 | 1982,CLE,1 561 | 1982,DET,1 562 | 1982,HOU,1 563 | 1982,LAD,1 564 | 1982,MIN,1 565 | 1982,WSN,1 566 | 1982,NYY,1 567 | 1982,NYM,1 568 | 1982,OAK,1 569 | 1982,PHI,1 570 | 1982,PIT,1 571 | 1982,SDP,1 572 | 1982,SEA,1 573 | 1982,SFG,1 574 | 1982,STL,1 575 | 1982,TEX,1 576 | 1982,TOR,1 577 | 1983,ATL,1 578 | 1983,BAL,1 579 | 1983,BOS,1 580 | 1983,CHW,1 581 | 1983,CHC,1 582 | 1983,CIN,1 583 | 1983,CLE,1 584 | 1983,DET,1 585 | 1983,HOU,1 586 | 1983,LAD,1 587 | 1983,MIN,1 588 | 1983,WSN,1 589 | 1983,NYY,1 590 | 1983,NYM,1 591 | 1983,OAK,1 592 | 1983,PHI,1 593 | 1983,PIT,1 594 | 1983,SDP,1 595 | 1983,SEA,1 596 | 1983,SFG,1 597 | 1983,STL,1 598 | 1983,TEX,1 599 | 1983,TOR,1 600 | 1984,ATL,1 601 | 1984,BAL,1 602 | 1984,BOS,1 603 | 1984,ANA,1 604 | 1984,CHW,1 605 | 1984,CHC,1 606 | 1984,CIN,1 607 | 1984,CLE,1 608 | 1984,DET,1 609 | 1984,HOU,1 610 | 1984,KCR,1 611 | 1984,LAD,1 612 | 1984,MIN,1 613 | 1984,MIL,1 614 | 1984,WSN,1 615 | 1984,NYY,1 616 | 1984,NYM,1 617 | 1984,OAK,1 618 | 1984,PIT,1 619 | 1984,SDP,1 620 | 1984,SEA,1 621 | 1984,STL,1 622 | 1984,TEX,1 623 | 1984,TOR,1 624 | 1985,ATL,1 625 | 1985,BAL,1 626 | 1985,BOS,1 627 | 1985,ANA,1 628 | 1985,CHW,1 629 | 1985,CHC,1 630 | 1985,CIN,1 631 | 1985,CLE,1 632 | 1985,DET,1 633 | 1985,HOU,1 634 | 1985,KCR,1 635 | 1985,LAD,1 636 | 1985,MIN,1 637 | 1985,MIL,1 638 | 1985,WSN,1 639 | 1985,NYY,1 640 | 1985,NYM,1 641 | 1985,OAK,1 642 | 1985,PHI,1 643 | 1985,PIT,1 644 | 1985,SDP,1 645 | 1985,SEA,1 646 | 1985,SFG,1 647 | 1985,STL,1 648 | 1985,TEX,1 649 | 1985,TOR,1 650 | 1986,ATL,1 651 | 1986,BAL,1 652 | 1986,BOS,1 653 | 1986,ANA,1 654 | 1986,CHW,1 655 | 1986,CIN,1 656 | 1986,CLE,1 657 | 1986,DET,1 658 | 1986,HOU,1 659 | 1986,KCR,1 660 | 1986,LAD,1 661 | 1986,MIL,1 662 | 1986,NYY,1 663 | 1986,NYM,1 664 | 1986,OAK,1 665 | 1986,PIT,1 666 | 1986,SDP,1 667 | 1986,SFG,1 668 | 1986,STL,1 669 | 1987,ATL,1 670 | 1987,CHW,1 671 | 1987,KCR,1 672 | 1987,LAD,1 673 | 1987,PIT,1 674 | 1987,SDP,1 675 | 1987,SEA,1 676 | 1987,STL,1 677 | 1988,ATL,1 678 | 1988,BAL,1 679 | 1988,BOS,1 680 | 1988,ANA,1 681 | 1988,CHW,1 682 | 1988,CHC,1 683 | 1988,CIN,1 684 | 1988,CLE,1 685 | 1988,DET,1 686 | 1988,HOU,1 687 | 1988,KCR,1 688 | 1988,LAD,1 689 | 1988,MIN,1 690 | 1988,MIL,1 691 | 1988,WSN,1 692 | 1988,NYM,1 693 | 1988,OAK,1 694 | 1988,PHI,1 695 | 1988,PIT,1 696 | 1988,SDP,1 697 | 1988,SEA,1 698 | 1988,SFG,1 699 | 1988,STL,1 700 | 1988,TEX,1 701 | 1988,TOR,1 702 | 1989,ATL,1 703 | 1989,BAL,1 704 | 1989,BOS,1 705 | 1989,ANA,1 706 | 1989,CHW,1 707 | 1989,CHC,1 708 | 1989,CIN,1 709 | 1989,CLE,1 710 | 1989,DET,1 711 | 1989,HOU,1 712 | 1989,KCR,1 713 | 1989,LAD,1 714 | 1989,MIN,1 715 | 1989,MIL,1 716 | 1989,WSN,1 717 | 1989,NYY,1 718 | 1989,NYM,1 719 | 1989,OAK,1 720 | 1989,PHI,1 721 | 1989,PIT,1 722 | 1989,SDP,1 723 | 1989,SEA,1 724 | 1989,SFG,1 725 | 1989,STL,1 726 | 1989,TOR,1 727 | 1990,BAL,1 728 | 1990,BOS,1 729 | 1990,ANA,1 730 | 1990,CHW,1 731 | 1990,CHC,1 732 | 1990,CIN,1 733 | 1990,CLE,1 734 | 1990,DET,1 735 | 1990,HOU,1 736 | 1990,KCR,1 737 | 1990,LAD,1 738 | 1990,MIN,1 739 | 1990,MIL,1 740 | 1990,WSN,1 741 | 1990,NYY,1 742 | 1990,OAK,1 743 | 1990,PHI,1 744 | 1990,PIT,1 745 | 1990,SDP,1 746 | 1990,SEA,1 747 | 1990,SFG,1 748 | 1990,STL,1 749 | 1990,TOR,1 750 | 1991,ATL,1 751 | 1991,BOS,1 752 | 1991,ANA,1 753 | 1991,CHW,1 754 | 1991,CHC,1 755 | 1991,CLE,1 756 | 1991,LAD,1 757 | 1991,MIN,1 758 | 1991,MIL,1 759 | 1991,WSN,1 760 | 1991,NYY,1 761 | 1991,NYM,1 762 | 1991,OAK,1 763 | 1991,PHI,1 764 | 1991,PIT,1 765 | 1991,SDP,1 766 | 1991,SEA,1 767 | 1991,SFG,1 768 | 1991,STL,1 769 | 1991,TOR,1 770 | 1992,ATL,1 771 | 1992,BAL,1 772 | 1992,BOS,1 773 | 1992,ANA,1 774 | 1992,CHW,1 775 | 1992,CHC,1 776 | 1992,CIN,1 777 | 1992,CLE,1 778 | 1992,DET,1 779 | 1992,HOU,1 780 | 1992,KCR,1 781 | 1992,LAD,1 782 | 1992,MIN,1 783 | 1992,MIL,1 784 | 1992,WSN,1 785 | 1992,NYY,1 786 | 1992,NYM,1 787 | 1992,OAK,1 788 | 1992,PHI,1 789 | 1992,PIT,1 790 | 1992,SDP,1 791 | 1992,SEA,1 792 | 1992,SFG,1 793 | 1992,STL,1 794 | 1992,TOR,1 795 | 1993,ATL,1 796 | 1993,BAL,1 797 | 1993,BOS,1 798 | 1993,ANA,1 799 | 1993,CHW,1 800 | 1993,CHC,1 801 | 1993,FLA,1 802 | 1993,HOU,1 803 | 1993,KCR,1 804 | 1993,LAD,1 805 | 1993,MIN,1 806 | 1993,MIL,1 807 | 1993,WSN,1 808 | 1993,NYM,1 809 | 1993,SFG,1 810 | 1993,STL,1 811 | 1994,SFG,1 812 | 1995,KCR,1 813 | 1995,STL,1 814 | 2002,ANA,1 815 | 2002,DET,1 816 | 2002,SFG,1 817 | 2002,STL,1 818 | 2003,ANA,1 819 | 2003,NYM,1 820 | 2003,OAK,1 821 | 2005,MIN,1 822 | 2005,OAK,1 823 | 2005,SEA,1 824 | 2005,SFG,1 825 | 2005,STL,1 826 | 1871,BNA,2 827 | 1871,CNA,2 828 | 1871,CFC,2 829 | 1871,KEK,2 830 | 1871,NNA,2 831 | 1871,PNA,2 832 | 1871,ROK,2 833 | 1871,TRO,2 834 | 1871,OLY,2 835 | 1872,BLC,2 836 | 1872,ECK,2 837 | 1872,BRA,2 838 | 1872,BNA,2 839 | 1872,CFC,2 840 | 1872,MAN,2 841 | 1872,NNA,2 842 | 1872,PNA,2 843 | 1872,TRO,2 844 | 1872,OLY,2 845 | 1872,NAT,2 846 | 1873,BLC,2 847 | 1873,MAR,2 848 | 1873,BRA,2 849 | 1873,BNA,2 850 | 1873,RES,2 851 | 1873,NNA,2 852 | 1873,PNA,2 853 | 1873,PWS,2 854 | 1873,WBL,2 855 | 1898,LAD,3 856 | 1898,ATL,3 857 | 1898,STL,3 858 | 1899,ATL,3 859 | 1901,LAD,3 860 | 1901,CIN,3 861 | 1902,BOS,3 862 | 1902,CLE,3 863 | 1911,ATL,3 864 | 1912,ATL,3 865 | 1912,STL,3 866 | 1919,PHI,3 867 | 1920,BOS,3 868 | 1920,CHW,3 869 | 1920,CLE,3 870 | 1920,DET,3 871 | 1920,NYY,3 872 | 1920,OAK,3 873 | 1920,PHI,3 874 | 1920,BAL,3 875 | 1920,STL,3 876 | 1920,MIN,3 877 | 1921,BOS,3 878 | 1921,LAD,3 879 | 1921,ATL,3 880 | 1921,CHW,3 881 | 1921,CHC,3 882 | 1921,CLE,3 883 | 1921,DET,3 884 | 1921,SFG,3 885 | 1921,NYY,3 886 | 1921,OAK,3 887 | 1921,PHI,3 888 | 1921,PIT,3 889 | 1921,BAL,3 890 | 1921,STL,3 891 | 1921,MIN,3 892 | 1922,BOS,3 893 | 1922,LAD,3 894 | 1922,ATL,3 895 | 1922,CHW,3 896 | 1922,CHC,3 897 | 1922,CIN,3 898 | 1922,CLE,3 899 | 1922,DET,3 900 | 1922,SFG,3 901 | 1922,NYY,3 902 | 1922,OAK,3 903 | 1922,PHI,3 904 | 1922,PIT,3 905 | 1922,BAL,3 906 | 1922,STL,3 907 | 1922,MIN,3 908 | 1923,BOS,3 909 | 1923,LAD,3 910 | 1923,ATL,3 911 | 1923,CHW,3 912 | 1923,CHC,3 913 | 1923,CIN,3 914 | 1923,CLE,3 915 | 1923,DET,3 916 | 1923,SFG,3 917 | 1923,NYY,3 918 | 1923,OAK,3 919 | 1923,PHI,3 920 | 1923,PIT,3 921 | 1923,BAL,3 922 | 1923,STL,3 923 | 1923,MIN,3 924 | 1924,BOS,3 925 | 1924,LAD,3 926 | 1924,ATL,3 927 | 1924,CHW,3 928 | 1924,CHC,3 929 | 1924,CIN,3 930 | 1924,CLE,3 931 | 1924,DET,3 932 | 1924,SFG,3 933 | 1924,NYY,3 934 | 1924,OAK,3 935 | 1924,PHI,3 936 | 1924,PIT,3 937 | 1924,BAL,3 938 | 1924,STL,3 939 | 1924,MIN,3 940 | 1925,BOS,3 941 | 1925,LAD,3 942 | 1925,ATL,3 943 | 1925,CHW,3 944 | 1925,CHC,3 945 | 1925,CIN,3 946 | 1925,CLE,3 947 | 1925,DET,3 948 | 1925,SFG,3 949 | 1925,NYY,3 950 | 1925,OAK,3 951 | 1925,PHI,3 952 | 1925,PIT,3 953 | 1925,BAL,3 954 | 1925,STL,3 955 | 1925,MIN,3 956 | 1926,BOS,3 957 | 1926,ATL,3 958 | 1926,CHW,3 959 | 1926,CIN,3 960 | 1926,CLE,3 961 | 1926,DET,3 962 | 1926,SFG,3 963 | 1926,NYY,3 964 | 1926,PHI,3 965 | 1926,PIT,3 966 | 1926,BAL,3 967 | 1926,STL,3 968 | 1926,MIN,3 969 | 1927,BOS,3 970 | 1927,ATL,3 971 | 1927,CHW,3 972 | 1927,CHC,3 973 | 1927,CIN,3 974 | 1927,CLE,3 975 | 1927,DET,3 976 | 1927,SFG,3 977 | 1927,NYY,3 978 | 1927,OAK,3 979 | 1927,PHI,3 980 | 1927,PIT,3 981 | 1927,BAL,3 982 | 1927,STL,3 983 | 1927,MIN,3 984 | 1928,BOS,3 985 | 1928,ATL,3 986 | 1928,CHW,3 987 | 1928,CHC,3 988 | 1928,CIN,3 989 | 1928,CLE,3 990 | 1928,DET,3 991 | 1928,SFG,3 992 | 1928,NYY,3 993 | 1928,OAK,3 994 | 1928,PHI,3 995 | 1928,PIT,3 996 | 1928,BAL,3 997 | 1928,STL,3 998 | 1928,MIN,3 999 | 1929,BOS,3 1000 | 1929,LAD,3 1001 | 1929,ATL,3 1002 | 1929,CHW,3 1003 | 1929,CHC,3 1004 | 1929,CIN,3 1005 | 1929,CLE,3 1006 | 1929,DET,3 1007 | 1929,SFG,3 1008 | 1929,NYY,3 1009 | 1929,OAK,3 1010 | 1929,PHI,3 1011 | 1929,PIT,3 1012 | 1929,BAL,3 1013 | 1929,STL,3 1014 | 1929,MIN,3 1015 | 1930,BOS,3 1016 | 1930,LAD,3 1017 | 1930,ATL,3 1018 | 1930,CHW,3 1019 | 1930,CHC,3 1020 | 1930,CIN,3 1021 | 1930,CLE,3 1022 | 1930,DET,3 1023 | 1930,SFG,3 1024 | 1930,NYY,3 1025 | 1930,OAK,3 1026 | 1930,PIT,3 1027 | 1930,BAL,3 1028 | 1930,STL,3 1029 | 1930,MIN,3 1030 | 1931,BOS,3 1031 | 1931,LAD,3 1032 | 1931,CHW,3 1033 | 1931,CHC,3 1034 | 1931,CIN,3 1035 | 1931,CLE,3 1036 | 1931,DET,3 1037 | 1931,SFG,3 1038 | 1931,NYY,3 1039 | 1931,OAK,3 1040 | 1931,PHI,3 1041 | 1931,PIT,3 1042 | 1931,BAL,3 1043 | 1931,STL,3 1044 | 1931,MIN,3 1045 | 1932,BOS,3 1046 | 1932,LAD,3 1047 | 1932,ATL,3 1048 | 1932,CHW,3 1049 | 1932,CHC,3 1050 | 1932,CIN,3 1051 | 1932,CLE,3 1052 | 1932,DET,3 1053 | 1932,SFG,3 1054 | 1932,NYY,3 1055 | 1932,OAK,3 1056 | 1932,PHI,3 1057 | 1932,PIT,3 1058 | 1932,BAL,3 1059 | 1932,STL,3 1060 | 1932,MIN,3 1061 | 1933,BOS,3 1062 | 1933,LAD,3 1063 | 1933,CHW,3 1064 | 1933,CLE,3 1065 | 1933,DET,3 1066 | 1933,NYY,3 1067 | 1933,OAK,3 1068 | 1933,PHI,3 1069 | 1933,PIT,3 1070 | 1933,BAL,3 1071 | 1933,STL,3 1072 | 1933,MIN,3 1073 | 1934,BOS,3 1074 | 1934,LAD,3 1075 | 1934,ATL,3 1076 | 1934,CHW,3 1077 | 1934,CHC,3 1078 | 1934,CIN,3 1079 | 1934,CLE,3 1080 | 1934,DET,3 1081 | 1934,SFG,3 1082 | 1934,NYY,3 1083 | 1934,OAK,3 1084 | 1934,PHI,3 1085 | 1934,PIT,3 1086 | 1934,BAL,3 1087 | 1934,STL,3 1088 | 1934,MIN,3 1089 | 1935,BOS,3 1090 | 1935,LAD,3 1091 | 1935,ATL,3 1092 | 1935,CHW,3 1093 | 1935,CHC,3 1094 | 1935,CIN,3 1095 | 1935,CLE,3 1096 | 1935,DET,3 1097 | 1935,SFG,3 1098 | 1935,NYY,3 1099 | 1935,OAK,3 1100 | 1935,PHI,3 1101 | 1935,PIT,3 1102 | 1935,BAL,3 1103 | 1935,STL,3 1104 | 1935,MIN,3 1105 | 1936,BOS,3 1106 | 1936,LAD,3 1107 | 1936,ATL,3 1108 | 1936,CHW,3 1109 | 1936,CHC,3 1110 | 1936,CIN,3 1111 | 1936,CLE,3 1112 | 1936,DET,3 1113 | 1936,SFG,3 1114 | 1936,NYY,3 1115 | 1936,OAK,3 1116 | 1936,PHI,3 1117 | 1936,PIT,3 1118 | 1936,BAL,3 1119 | 1936,STL,3 1120 | 1936,MIN,3 1121 | 1937,BOS,3 1122 | 1937,LAD,3 1123 | 1937,CHW,3 1124 | 1937,CHC,3 1125 | 1937,CLE,3 1126 | 1937,DET,3 1127 | 1937,SFG,3 1128 | 1937,NYY,3 1129 | 1937,OAK,3 1130 | 1937,PHI,3 1131 | 1937,PIT,3 1132 | 1937,BAL,3 1133 | 1937,STL,3 1134 | 1937,MIN,3 1135 | 1938,BOS,3 1136 | 1938,LAD,3 1137 | 1938,CHW,3 1138 | 1938,CHC,3 1139 | 1938,CIN,3 1140 | 1938,CLE,3 1141 | 1938,DET,3 1142 | 1938,SFG,3 1143 | 1938,NYY,3 1144 | 1938,OAK,3 1145 | 1938,PHI,3 1146 | 1938,PIT,3 1147 | 1938,BAL,3 1148 | 1938,STL,3 1149 | 1938,MIN,3 1150 | 1939,BOS,3 1151 | 1939,LAD,3 1152 | 1939,ATL,3 1153 | 1939,CHW,3 1154 | 1939,CHC,3 1155 | 1939,CIN,3 1156 | 1939,CLE,3 1157 | 1939,DET,3 1158 | 1939,SFG,3 1159 | 1939,NYY,3 1160 | 1939,OAK,3 1161 | 1939,PHI,3 1162 | 1939,PIT,3 1163 | 1939,BAL,3 1164 | 1939,STL,3 1165 | 1939,MIN,3 1166 | 1940,BOS,3 1167 | 1940,LAD,3 1168 | 1940,ATL,3 1169 | 1940,CHW,3 1170 | 1940,CHC,3 1171 | 1940,CLE,3 1172 | 1940,DET,3 1173 | 1940,SFG,3 1174 | 1940,NYY,3 1175 | 1940,OAK,3 1176 | 1940,PIT,3 1177 | 1940,BAL,3 1178 | 1940,STL,3 1179 | 1940,MIN,3 1180 | 1941,BOS,3 1181 | 1941,LAD,3 1182 | 1941,ATL,3 1183 | 1941,CHW,3 1184 | 1941,CLE,3 1185 | 1941,DET,3 1186 | 1941,SFG,3 1187 | 1941,NYY,3 1188 | 1941,OAK,3 1189 | 1941,PHI,3 1190 | 1941,PIT,3 1191 | 1941,BAL,3 1192 | 1941,STL,3 1193 | 1941,MIN,3 1194 | 1942,BOS,3 1195 | 1942,CHC,3 1196 | 1942,OAK,3 1197 | 1942,BAL,3 1198 | 1942,MIN,3 1199 | 1943,LAD,3 1200 | 1943,SFG,3 1201 | 1944,BOS,3 1202 | 1944,LAD,3 1203 | 1944,CHC,3 1204 | 1944,CLE,3 1205 | 1944,SFG,3 1206 | 1944,NYY,3 1207 | 1944,PIT,3 1208 | 1945,BOS,3 1209 | 1945,LAD,3 1210 | 1945,ATL,3 1211 | 1945,CIN,3 1212 | 1945,SFG,3 1213 | 1945,PHI,3 1214 | 1945,PIT,3 1215 | 1945,STL,3 1216 | 1946,BAL,3 1217 | 1946,MIN,3 1218 | 1947,BOS,3 1219 | 1947,LAD,3 1220 | 1947,ATL,3 1221 | 1947,CHC,3 1222 | 1947,CIN,3 1223 | 1947,SFG,3 1224 | 1947,PIT,3 1225 | 1947,STL,3 1226 | 1948,BOS,3 1227 | 1948,ATL,3 1228 | 1948,CHW,3 1229 | 1948,CHC,3 1230 | 1948,CIN,3 1231 | 1948,CLE,3 1232 | 1948,DET,3 1233 | 1948,SFG,3 1234 | 1948,NYY,3 1235 | 1948,OAK,3 1236 | 1948,PHI,3 1237 | 1948,PIT,3 1238 | 1948,BAL,3 1239 | 1948,STL,3 1240 | 1948,MIN,3 1241 | 1949,BOS,3 1242 | 1949,LAD,3 1243 | 1949,ATL,3 1244 | 1949,CHW,3 1245 | 1949,CHC,3 1246 | 1949,CIN,3 1247 | 1949,DET,3 1248 | 1949,SFG,3 1249 | 1949,NYY,3 1250 | 1949,OAK,3 1251 | 1949,PHI,3 1252 | 1949,PIT,3 1253 | 1949,BAL,3 1254 | 1949,STL,3 1255 | 1949,MIN,3 1256 | 1950,BOS,3 1257 | 1950,LAD,3 1258 | 1950,ATL,3 1259 | 1950,CHW,3 1260 | 1950,CIN,3 1261 | 1950,DET,3 1262 | 1950,NYY,3 1263 | 1950,OAK,3 1264 | 1950,PHI,3 1265 | 1950,PIT,3 1266 | 1950,BAL,3 1267 | 1950,STL,3 1268 | 1950,MIN,3 1269 | 1951,BOS,3 1270 | 1951,LAD,3 1271 | 1951,ATL,3 1272 | 1951,CHW,3 1273 | 1951,CHC,3 1274 | 1951,DET,3 1275 | 1951,SFG,3 1276 | 1951,OAK,3 1277 | 1951,PHI,3 1278 | 1951,PIT,3 1279 | 1951,BAL,3 1280 | 1951,STL,3 1281 | 1951,MIN,3 1282 | 1952,OAK,3 1283 | 1952,BAL,3 1284 | 1953,BOS,3 1285 | 1953,CHC,3 1286 | 1953,CIN,3 1287 | 1953,DET,3 1288 | 1953,SFG,3 1289 | 1953,NYY,3 1290 | 1953,OAK,3 1291 | 1953,PHI,3 1292 | 1953,PIT,3 1293 | 1953,BAL,3 1294 | 1953,STL,3 1295 | 1954,BOS,3 1296 | 1954,CHC,3 1297 | 1954,CIN,3 1298 | 1954,OAK,3 1299 | 1954,PIT,3 1300 | 1954,STL,3 1301 | 1955,CIN,3 1302 | 1955,DET,3 1303 | 1955,OAK,3 1304 | 1955,MIN,3 1305 | 1956,BOS,3 1306 | 1956,DET,3 1307 | 1957,CIN,3 1308 | 1977,CHW,3 1309 | 1977,MIN,3 1310 | 1979,BOS,3 1311 | 1979,CHW,3 1312 | 1979,KCR,3 1313 | 1979,MIN,3 1314 | 1979,MIL,3 1315 | 1979,SEA,3 1316 | 1979,TOR,3 1317 | 1980,BOS,3 1318 | 1980,CLE,3 1319 | 1980,KCR,3 1320 | 1980,MIL,3 1321 | 1980,TEX,3 1322 | 1982,KCR,3 1323 | 1982,MIL,3 1324 | 1983,ANA,3 1325 | 1983,KCR,3 1326 | 1983,MIL,3 1327 | 1886,LAD,4 1328 | 1886,ATL,4 1329 | 1886,CHC,4 1330 | 1886,CIN,4 1331 | 1886,DTN,4 1332 | 1886,KCN,4 1333 | 1886,LOU,4 1334 | 1886,PHA,4 1335 | 1886,STL,4 1336 | 1887,BLO,4 1337 | 1887,LAD,4 1338 | 1887,ATL,4 1339 | 1887,CHC,4 1340 | 1887,CLV,4 1341 | 1887,CIN,4 1342 | 1887,DTN,4 1343 | 1887,IND,4 1344 | 1887,LOU,4 1345 | 1887,SFG,4 1346 | 1887,NYP,4 1347 | 1887,PHA,4 1348 | 1887,PHI,4 1349 | 1887,PIT,4 1350 | 1887,STL,4 1351 | 1887,WNL,4 1352 | 1888,BLO,4 1353 | 1888,CLV,4 1354 | 1888,CIN,4 1355 | 1888,DTN,4 1356 | 1888,IND,4 1357 | 1888,KCC,4 1358 | 1888,LOU,4 1359 | 1888,PHA,4 1360 | 1889,BLO,4 1361 | 1889,LAD,4 1362 | 1889,ATL,4 1363 | 1889,CHC,4 1364 | 1889,CLV,4 1365 | 1889,CLS,4 1366 | 1889,CIN,4 1367 | 1889,IND,4 1368 | 1889,KCC,4 1369 | 1889,LOU,4 1370 | 1889,SFG,4 1371 | 1889,PHA,4 1372 | 1889,PHI,4 1373 | 1889,PIT,4 1374 | 1889,STL,4 1375 | 1889,WNL,4 1376 | 1890,BFB,4 1377 | 1890,BRG,4 1378 | 1890,LAD,4 1379 | 1890,BWW,4 1380 | 1890,ATL,4 1381 | 1890,BRS,4 1382 | 1890,CHC,4 1383 | 1890,CHP,4 1384 | 1890,CIN,4 1385 | 1890,CLV,4 1386 | 1890,CLI,4 1387 | 1890,LOU,4 1388 | 1890,SFG,4 1389 | 1890,NYI,4 1390 | 1890,PHA,4 1391 | 1890,PHI,4 1392 | 1890,PHQ,4 1393 | 1890,PIT,4 1394 | 1890,PBB,4 1395 | 1890,ROC,4 1396 | 1890,STL,4 1397 | 1890,SYS,4 1398 | 1890,TLM,4 1399 | 1891,BLO,4 1400 | 1891,LAD,4 1401 | 1891,BRS,4 1402 | 1891,ATL,4 1403 | 1891,CHC,4 1404 | 1891,CIN,4 1405 | 1891,CLV,4 1406 | 1891,CLS,4 1407 | 1891,CKK,4 1408 | 1891,LOU,4 1409 | 1891,MLA,4 1410 | 1891,SFG,4 1411 | 1891,PHQ,4 1412 | 1891,PHI,4 1413 | 1891,PIT,4 1414 | 1891,STL,4 1415 | 1891,WAS,4 1416 | 1892,BLO,4 1417 | 1892,LAD,4 1418 | 1892,ATL,4 1419 | 1892,SFG,4 1420 | 1892,PHI,4 1421 | 1892,PIT,4 1422 | 1892,STL,4 1423 | 1892,WAS,4 1424 | 1893,BLO,4 1425 | 1893,LAD,4 1426 | 1893,ATL,4 1427 | 1893,CHC,4 1428 | 1893,CIN,4 1429 | 1893,CLV,4 1430 | 1893,LOU,4 1431 | 1893,SFG,4 1432 | 1893,PHI,4 1433 | 1893,PIT,4 1434 | 1893,STL,4 1435 | 1893,WAS,4 1436 | 1894,BLO,4 1437 | 1894,LAD,4 1438 | 1894,ATL,4 1439 | 1894,CHC,4 1440 | 1894,CIN,4 1441 | 1894,CLV,4 1442 | 1894,LOU,4 1443 | 1894,SFG,4 1444 | 1894,PHI,4 1445 | 1894,PIT,4 1446 | 1894,STL,4 1447 | 1894,WAS,4 1448 | 1895,BLO,4 1449 | 1895,LAD,4 1450 | 1895,ATL,4 1451 | 1895,CHC,4 1452 | 1895,CIN,4 1453 | 1895,CLV,4 1454 | 1895,LOU,4 1455 | 1895,SFG,4 1456 | 1895,PHI,4 1457 | 1895,PIT,4 1458 | 1895,STL,4 1459 | 1895,WAS,4 1460 | 1896,BLO,4 1461 | 1896,LAD,4 1462 | 1896,ATL,4 1463 | 1896,CHC,4 1464 | 1896,CIN,4 1465 | 1896,CLV,4 1466 | 1896,LOU,4 1467 | 1896,SFG,4 1468 | 1896,PHI,4 1469 | 1896,PIT,4 1470 | 1896,STL,4 1471 | 1896,WAS,4 1472 | 1897,BLO,4 1473 | 1897,LAD,4 1474 | 1897,ATL,4 1475 | 1897,CHC,4 1476 | 1897,CIN,4 1477 | 1897,CLV,4 1478 | 1897,LOU,4 1479 | 1897,SFG,4 1480 | 1897,PHI,4 1481 | 1897,PIT,4 1482 | 1897,STL,4 1483 | 1897,WAS,4 1484 | 1898,BLO,4 1485 | 1898,CHC,4 1486 | 1898,CIN,4 1487 | 1898,LOU,4 1488 | 1898,SFG,4 1489 | 1898,PHI,4 1490 | 1898,WAS,4 1491 | 1899,BLO,4 1492 | 1899,LAD,4 1493 | 1899,CHC,4 1494 | 1899,CIN,4 1495 | 1899,CLV,4 1496 | 1899,LOU,4 1497 | 1899,SFG,4 1498 | 1899,PHI,4 1499 | 1899,PIT,4 1500 | 1899,STL,4 1501 | 1899,WAS,4 1502 | 1900,LAD,4 1503 | 1900,ATL,4 1504 | 1900,CIN,4 1505 | 1900,SFG,4 1506 | 1900,PHI,4 1507 | 1900,STL,4 1508 | 1901,NYY,4 1509 | 1901,BOS,4 1510 | 1901,CHW,4 1511 | 1901,CLE,4 1512 | 1901,DET,4 1513 | 1901,BAL,4 1514 | 1901,OAK,4 1515 | 1901,STL,4 1516 | 1901,MIN,4 1517 | 1902,NYY,4 1518 | 1902,OAK,4 1519 | 1902,MIN,4 1520 | 1930,PHI,4 1521 | 1959,LAD,5 1522 | 1961,CHC,5 1523 | 1961,ANA,5 1524 | 1961,LAD,5 1525 | 1963,BOS,5 1526 | 1963,CLE,5 1527 | 1964,BOS,5 1528 | 1964,CLE,5 1529 | 1964,OAK,5 1530 | 1964,MIN,5 1531 | 1965,BOS,5 1532 | 1965,CIN,5 1533 | 1965,PHI,5 1534 | 1966,CHC,5 1535 | 1966,CIN,5 1536 | 1966,DET,5 1537 | 1969,CIN,5 1538 | 1969,HOU,5 1539 | 1969,PHI,5 1540 | 1969,PIT,5 1541 | 1970,NYM,5 1542 | 1970,PHI,5 1543 | 1970,SDP,5 1544 | 1970,SFG,5 1545 | 1970,STL,5 1546 | 1972,HOU,5 1547 | 1977,ATL,5 1548 | 1977,SDP,5 1549 | 1984,PHI,5 1550 | 1984,SFG,5 1551 | 1986,CHC,5 1552 | 1986,MIN,5 1553 | 1986,WSN,5 1554 | 1986,PHI,5 1555 | 1986,SEA,5 1556 | 1986,TEX,5 1557 | 1986,TOR,5 1558 | 1987,BAL,5 1559 | 1987,BOS,5 1560 | 1987,ANA,5 1561 | 1987,CHC,5 1562 | 1987,CIN,5 1563 | 1987,CLE,5 1564 | 1987,DET,5 1565 | 1987,HOU,5 1566 | 1987,MIN,5 1567 | 1987,MIL,5 1568 | 1987,WSN,5 1569 | 1987,NYY,5 1570 | 1987,NYM,5 1571 | 1987,OAK,5 1572 | 1987,PHI,5 1573 | 1987,SFG,5 1574 | 1987,TEX,5 1575 | 1987,TOR,5 1576 | 1988,NYY,5 1577 | 1989,TEX,5 1578 | 1990,ATL,5 1579 | 1990,NYM,5 1580 | 1990,TEX,5 1581 | 1991,BAL,5 1582 | 1991,CIN,5 1583 | 1991,DET,5 1584 | 1991,HOU,5 1585 | 1991,KCR,5 1586 | 1991,TEX,5 1587 | 1992,TEX,5 1588 | 1993,CIN,5 1589 | 1993,CLE,5 1590 | 1993,COL,5 1591 | 1993,DET,5 1592 | 1993,NYY,5 1593 | 1993,OAK,5 1594 | 1993,PHI,5 1595 | 1993,PIT,5 1596 | 1993,SDP,5 1597 | 1993,SEA,5 1598 | 1993,TEX,5 1599 | 1993,TOR,5 1600 | 1994,ATL,5 1601 | 1994,BAL,5 1602 | 1994,BOS,5 1603 | 1994,ANA,5 1604 | 1994,CHW,5 1605 | 1994,CHC,5 1606 | 1994,CIN,5 1607 | 1994,CLE,5 1608 | 1994,COL,5 1609 | 1994,DET,5 1610 | 1994,FLA,5 1611 | 1994,HOU,5 1612 | 1994,KCR,5 1613 | 1994,LAD,5 1614 | 1994,MIN,5 1615 | 1994,MIL,5 1616 | 1994,WSN,5 1617 | 1994,NYY,5 1618 | 1994,NYM,5 1619 | 1994,OAK,5 1620 | 1994,PHI,5 1621 | 1994,PIT,5 1622 | 1994,SDP,5 1623 | 1994,SEA,5 1624 | 1994,STL,5 1625 | 1994,TEX,5 1626 | 1994,TOR,5 1627 | 1995,ATL,5 1628 | 1995,BAL,5 1629 | 1995,BOS,5 1630 | 1995,ANA,5 1631 | 1995,CHW,5 1632 | 1995,CHC,5 1633 | 1995,CIN,5 1634 | 1995,CLE,5 1635 | 1995,COL,5 1636 | 1995,DET,5 1637 | 1995,FLA,5 1638 | 1995,HOU,5 1639 | 1995,LAD,5 1640 | 1995,MIN,5 1641 | 1995,MIL,5 1642 | 1995,WSN,5 1643 | 1995,NYY,5 1644 | 1995,NYM,5 1645 | 1995,OAK,5 1646 | 1995,PHI,5 1647 | 1995,PIT,5 1648 | 1995,SDP,5 1649 | 1995,SEA,5 1650 | 1995,SFG,5 1651 | 1995,TEX,5 1652 | 1995,TOR,5 1653 | 1996,ATL,5 1654 | 1996,BAL,5 1655 | 1996,BOS,5 1656 | 1996,ANA,5 1657 | 1996,CHW,5 1658 | 1996,CHC,5 1659 | 1996,CIN,5 1660 | 1996,CLE,5 1661 | 1996,COL,5 1662 | 1996,DET,5 1663 | 1996,FLA,5 1664 | 1996,HOU,5 1665 | 1996,KCR,5 1666 | 1996,LAD,5 1667 | 1996,MIN,5 1668 | 1996,MIL,5 1669 | 1996,WSN,5 1670 | 1996,NYY,5 1671 | 1996,NYM,5 1672 | 1996,OAK,5 1673 | 1996,PHI,5 1674 | 1996,PIT,5 1675 | 1996,SDP,5 1676 | 1996,SEA,5 1677 | 1996,SFG,5 1678 | 1996,STL,5 1679 | 1996,TEX,5 1680 | 1996,TOR,5 1681 | 1997,ANA,5 1682 | 1997,ATL,5 1683 | 1997,BAL,5 1684 | 1997,BOS,5 1685 | 1997,CHW,5 1686 | 1997,CHC,5 1687 | 1997,CIN,5 1688 | 1997,CLE,5 1689 | 1997,COL,5 1690 | 1997,DET,5 1691 | 1997,FLA,5 1692 | 1997,HOU,5 1693 | 1997,KCR,5 1694 | 1997,LAD,5 1695 | 1997,MIN,5 1696 | 1997,MIL,5 1697 | 1997,WSN,5 1698 | 1997,NYY,5 1699 | 1997,NYM,5 1700 | 1997,OAK,5 1701 | 1997,PHI,5 1702 | 1997,PIT,5 1703 | 1997,SDP,5 1704 | 1997,SEA,5 1705 | 1997,SFG,5 1706 | 1997,STL,5 1707 | 1997,TEX,5 1708 | 1997,TOR,5 1709 | 1998,ANA,5 1710 | 1998,ARI,5 1711 | 1998,ATL,5 1712 | 1998,BAL,5 1713 | 1998,BOS,5 1714 | 1998,CHW,5 1715 | 1998,CHC,5 1716 | 1998,CIN,5 1717 | 1998,CLE,5 1718 | 1998,COL,5 1719 | 1998,DET,5 1720 | 1998,FLA,5 1721 | 1998,HOU,5 1722 | 1998,KCR,5 1723 | 1998,LAD,5 1724 | 1998,MIL,5 1725 | 1998,MIN,5 1726 | 1998,WSN,5 1727 | 1998,NYY,5 1728 | 1998,NYM,5 1729 | 1998,OAK,5 1730 | 1998,PHI,5 1731 | 1998,PIT,5 1732 | 1998,SDP,5 1733 | 1998,SEA,5 1734 | 1998,SFG,5 1735 | 1998,STL,5 1736 | 1998,TBD,5 1737 | 1998,TEX,5 1738 | 1998,TOR,5 1739 | 1999,ANA,5 1740 | 1999,ARI,5 1741 | 1999,ATL,5 1742 | 1999,BAL,5 1743 | 1999,BOS,5 1744 | 1999,CHW,5 1745 | 1999,CHC,5 1746 | 1999,CIN,5 1747 | 1999,CLE,5 1748 | 1999,COL,5 1749 | 1999,DET,5 1750 | 1999,FLA,5 1751 | 1999,HOU,5 1752 | 1999,KCR,5 1753 | 1999,LAD,5 1754 | 1999,MIL,5 1755 | 1999,MIN,5 1756 | 1999,WSN,5 1757 | 1999,NYY,5 1758 | 1999,NYM,5 1759 | 1999,OAK,5 1760 | 1999,PHI,5 1761 | 1999,PIT,5 1762 | 1999,SDP,5 1763 | 1999,SEA,5 1764 | 1999,SFG,5 1765 | 1999,STL,5 1766 | 1999,TBD,5 1767 | 1999,TEX,5 1768 | 1999,TOR,5 1769 | 2000,ANA,5 1770 | 2000,ARI,5 1771 | 2000,ATL,5 1772 | 2000,BAL,5 1773 | 2000,BOS,5 1774 | 2000,CHW,5 1775 | 2000,CHC,5 1776 | 2000,CIN,5 1777 | 2000,CLE,5 1778 | 2000,COL,5 1779 | 2000,DET,5 1780 | 2000,FLA,5 1781 | 2000,HOU,5 1782 | 2000,KCR,5 1783 | 2000,LAD,5 1784 | 2000,MIL,5 1785 | 2000,MIN,5 1786 | 2000,WSN,5 1787 | 2000,NYY,5 1788 | 2000,NYM,5 1789 | 2000,OAK,5 1790 | 2000,PHI,5 1791 | 2000,PIT,5 1792 | 2000,SDP,5 1793 | 2000,SEA,5 1794 | 2000,SFG,5 1795 | 2000,STL,5 1796 | 2000,TBD,5 1797 | 2000,TEX,5 1798 | 2000,TOR,5 1799 | 2001,ANA,5 1800 | 2001,ARI,5 1801 | 2001,ATL,5 1802 | 2001,BAL,5 1803 | 2001,BOS,5 1804 | 2001,CHW,5 1805 | 2001,CHC,5 1806 | 2001,CIN,5 1807 | 2001,CLE,5 1808 | 2001,COL,5 1809 | 2001,DET,5 1810 | 2001,FLA,5 1811 | 2001,HOU,5 1812 | 2001,KCR,5 1813 | 2001,LAD,5 1814 | 2001,MIL,5 1815 | 2001,MIN,5 1816 | 2001,WSN,5 1817 | 2001,NYY,5 1818 | 2001,NYM,5 1819 | 2001,OAK,5 1820 | 2001,PHI,5 1821 | 2001,PIT,5 1822 | 2001,SDP,5 1823 | 2001,SEA,5 1824 | 2001,SFG,5 1825 | 2001,STL,5 1826 | 2001,TBD,5 1827 | 2001,TEX,5 1828 | 2001,TOR,5 1829 | 2002,ARI,5 1830 | 2002,ATL,5 1831 | 2002,BAL,5 1832 | 2002,BOS,5 1833 | 2002,CHW,5 1834 | 2002,CHC,5 1835 | 2002,CIN,5 1836 | 2002,CLE,5 1837 | 2002,COL,5 1838 | 2002,FLA,5 1839 | 2002,HOU,5 1840 | 2002,KCR,5 1841 | 2002,LAD,5 1842 | 2002,MIL,5 1843 | 2002,MIN,5 1844 | 2002,WSN,5 1845 | 2002,NYY,5 1846 | 2002,NYM,5 1847 | 2002,OAK,5 1848 | 2002,PHI,5 1849 | 2002,PIT,5 1850 | 2002,SDP,5 1851 | 2002,SEA,5 1852 | 2002,TBD,5 1853 | 2002,TEX,5 1854 | 2002,TOR,5 1855 | 2003,ARI,5 1856 | 2003,ATL,5 1857 | 2003,BAL,5 1858 | 2003,BOS,5 1859 | 2003,CHW,5 1860 | 2003,CHC,5 1861 | 2003,CIN,5 1862 | 2003,CLE,5 1863 | 2003,COL,5 1864 | 2003,DET,5 1865 | 2003,FLA,5 1866 | 2003,HOU,5 1867 | 2003,KCR,5 1868 | 2003,LAD,5 1869 | 2003,MIL,5 1870 | 2003,MIN,5 1871 | 2003,WSN,5 1872 | 2003,NYY,5 1873 | 2003,PHI,5 1874 | 2003,PIT,5 1875 | 2003,SDP,5 1876 | 2003,SEA,5 1877 | 2003,SFG,5 1878 | 2003,STL,5 1879 | 2003,TBD,5 1880 | 2003,TEX,5 1881 | 2003,TOR,5 1882 | 2004,ANA,5 1883 | 2004,ARI,5 1884 | 2004,ATL,5 1885 | 2004,BAL,5 1886 | 2004,BOS,5 1887 | 2004,CHW,5 1888 | 2004,CHC,5 1889 | 2004,CIN,5 1890 | 2004,CLE,5 1891 | 2004,COL,5 1892 | 2004,DET,5 1893 | 2004,FLA,5 1894 | 2004,HOU,5 1895 | 2004,KCR,5 1896 | 2004,LAD,5 1897 | 2004,MIL,5 1898 | 2004,MIN,5 1899 | 2004,WSN,5 1900 | 2004,NYY,5 1901 | 2004,NYM,5 1902 | 2004,OAK,5 1903 | 2004,PHI,5 1904 | 2004,PIT,5 1905 | 2004,SDP,5 1906 | 2004,SEA,5 1907 | 2004,SFG,5 1908 | 2004,STL,5 1909 | 2004,TBD,5 1910 | 2004,TEX,5 1911 | 2004,TOR,5 1912 | 2005,ARI,5 1913 | 2005,ATL,5 1914 | 2005,BAL,5 1915 | 2005,BOS,5 1916 | 2005,CHW,5 1917 | 2005,CHC,5 1918 | 2005,CIN,5 1919 | 2005,CLE,5 1920 | 2005,COL,5 1921 | 2005,DET,5 1922 | 2005,FLA,5 1923 | 2005,HOU,5 1924 | 2005,KCR,5 1925 | 2005,ANA,5 1926 | 2005,LAD,5 1927 | 2005,MIL,5 1928 | 2005,NYY,5 1929 | 2005,NYM,5 1930 | 2005,PHI,5 1931 | 2005,PIT,5 1932 | 2005,SDP,5 1933 | 2005,TBD,5 1934 | 2005,TEX,5 1935 | 2005,TOR,5 1936 | 2005,WSN,5 1937 | 2006,ARI,5 1938 | 2006,ATL,5 1939 | 2006,BAL,5 1940 | 2006,BOS,5 1941 | 2006,CHW,5 1942 | 2006,CHC,5 1943 | 2006,CIN,5 1944 | 2006,CLE,5 1945 | 2006,COL,5 1946 | 2006,DET,5 1947 | 2006,FLA,5 1948 | 2006,HOU,5 1949 | 2006,KCR,5 1950 | 2006,ANA,5 1951 | 2006,LAD,5 1952 | 2006,MIL,5 1953 | 2006,MIN,5 1954 | 2006,NYY,5 1955 | 2006,NYM,5 1956 | 2006,OAK,5 1957 | 2006,PHI,5 1958 | 2006,PIT,5 1959 | 2006,SDP,5 1960 | 2006,SEA,5 1961 | 2006,SFG,5 1962 | 2006,STL,5 1963 | 2006,TBD,5 1964 | 2006,TEX,5 1965 | 2006,TOR,5 1966 | 2006,WSN,5 1967 | 2007,ARI,5 1968 | 2007,ATL,5 1969 | 2007,BAL,5 1970 | 2007,BOS,5 1971 | 2007,CHW,5 1972 | 2007,CHC,5 1973 | 2007,CIN,5 1974 | 2007,CLE,5 1975 | 2007,COL,5 1976 | 2007,DET,5 1977 | 2007,FLA,5 1978 | 2007,HOU,5 1979 | 2007,KCR,5 1980 | 2007,ANA,5 1981 | 2007,LAD,5 1982 | 2007,MIL,5 1983 | 2007,MIN,5 1984 | 2007,NYY,5 1985 | 2007,NYM,5 1986 | 2007,OAK,5 1987 | 2007,PHI,5 1988 | 2007,PIT,5 1989 | 2007,SDP,5 1990 | 2007,SEA,5 1991 | 2007,SFG,5 1992 | 2007,STL,5 1993 | 2007,TBD,5 1994 | 2007,TEX,5 1995 | 2007,TOR,5 1996 | 2007,WSN,5 1997 | 2008,ARI,5 1998 | 2008,ATL,5 1999 | 2008,BAL,5 2000 | 2008,BOS,5 2001 | 2008,CHW,5 2002 | 2008,CHC,5 2003 | 2008,CIN,5 2004 | 2008,CLE,5 2005 | 2008,COL,5 2006 | 2008,DET,5 2007 | 2008,FLA,5 2008 | 2008,HOU,5 2009 | 2008,KCR,5 2010 | 2008,ANA,5 2011 | 2008,LAD,5 2012 | 2008,MIL,5 2013 | 2008,MIN,5 2014 | 2008,NYY,5 2015 | 2008,NYM,5 2016 | 2008,OAK,5 2017 | 2008,PHI,5 2018 | 2008,PIT,5 2019 | 2008,SDP,5 2020 | 2008,SEA,5 2021 | 2008,SFG,5 2022 | 2008,STL,5 2023 | 2008,TBD,5 2024 | 2008,TEX,5 2025 | 2008,TOR,5 2026 | 2008,WSN,5 2027 | 2009,ARI,5 2028 | 2009,ATL,5 2029 | 2009,BAL,5 2030 | 2009,BOS,5 2031 | 2009,CHW,5 2032 | 2009,CHC,5 2033 | 2009,CIN,5 2034 | 2009,CLE,5 2035 | 2009,COL,5 2036 | 2009,DET,5 2037 | 2009,FLA,5 2038 | 2009,HOU,5 2039 | 2009,KCR,5 2040 | 2009,ANA,5 2041 | 2009,LAD,5 2042 | 2009,MIL,5 2043 | 2009,MIN,5 2044 | 2009,NYY,5 2045 | 2009,NYM,5 2046 | 2009,OAK,5 2047 | 2009,PHI,5 2048 | 2009,PIT,5 2049 | 2009,SDP,5 2050 | 2009,SEA,5 2051 | 2009,SFG,5 2052 | 2009,STL,5 2053 | 2009,TBD,5 2054 | 2009,TEX,5 2055 | 2009,TOR,5 2056 | 2009,WSN,5 2057 | 2010,ARI,5 2058 | 2010,ATL,5 2059 | 2010,BAL,5 2060 | 2010,BOS,5 2061 | 2010,CHW,5 2062 | 2010,CHC,5 2063 | 2010,CIN,5 2064 | 2010,CLE,5 2065 | 2010,COL,5 2066 | 2010,DET,5 2067 | 2010,FLA,5 2068 | 2010,HOU,5 2069 | 2010,KCR,5 2070 | 2010,ANA,5 2071 | 2010,LAD,5 2072 | 2010,MIL,5 2073 | 2010,MIN,5 2074 | 2010,NYY,5 2075 | 2010,NYM,5 2076 | 2010,OAK,5 2077 | 2010,PHI,5 2078 | 2010,PIT,5 2079 | 2010,SDP,5 2080 | 2010,SEA,5 2081 | 2010,SFG,5 2082 | 2010,STL,5 2083 | 2010,TBD,5 2084 | 2010,TEX,5 2085 | 2010,TOR,5 2086 | 2010,WSN,5 2087 | 2011,ARI,5 2088 | 2011,ATL,5 2089 | 2011,BAL,5 2090 | 2011,BOS,5 2091 | 2011,CHW,5 2092 | 2011,CHC,5 2093 | 2011,CIN,5 2094 | 2011,CLE,5 2095 | 2011,COL,5 2096 | 2011,DET,5 2097 | 2011,FLA,5 2098 | 2011,HOU,5 2099 | 2011,KCR,5 2100 | 2011,ANA,5 2101 | 2011,LAD,5 2102 | 2011,MIL,5 2103 | 2011,MIN,5 2104 | 2011,NYY,5 2105 | 2011,NYM,5 2106 | 2011,OAK,5 2107 | 2011,PHI,5 2108 | 2011,PIT,5 2109 | 2011,SDP,5 2110 | 2011,SEA,5 2111 | 2011,SFG,5 2112 | 2011,STL,5 2113 | 2011,TBD,5 2114 | 2011,TEX,5 2115 | 2011,TOR,5 2116 | 2011,WSN,5 2117 | 2012,ARI,5 2118 | 2012,ATL,5 2119 | 2012,BAL,5 2120 | 2012,BOS,5 2121 | 2012,CHW,5 2122 | 2012,CHC,5 2123 | 2012,CIN,5 2124 | 2012,CLE,5 2125 | 2012,COL,5 2126 | 2012,DET,5 2127 | 2012,HOU,5 2128 | 2012,KCR,5 2129 | 2012,ANA,5 2130 | 2012,LAD,5 2131 | 2012,FLA,5 2132 | 2012,MIL,5 2133 | 2012,MIN,5 2134 | 2012,NYY,5 2135 | 2012,NYM,5 2136 | 2012,OAK,5 2137 | 2012,PHI,5 2138 | 2012,PIT,5 2139 | 2012,SDP,5 2140 | 2012,SEA,5 2141 | 2012,SFG,5 2142 | 2012,STL,5 2143 | 2012,TBD,5 2144 | 2012,TEX,5 2145 | 2012,TOR,5 2146 | 2012,WSN,5 2147 | 2013,ARI,5 2148 | 2013,ATL,5 2149 | 2013,BAL,5 2150 | 2013,BOS,5 2151 | 2013,CHW,5 2152 | 2013,CHC,5 2153 | 2013,CIN,5 2154 | 2013,CLE,5 2155 | 2013,COL,5 2156 | 2013,DET,5 2157 | 2013,HOU,5 2158 | 2013,KCR,5 2159 | 2013,ANA,5 2160 | 2013,LAD,5 2161 | 2013,FLA,5 2162 | 2013,MIL,5 2163 | 2013,MIN,5 2164 | 2013,NYY,5 2165 | 2013,NYM,5 2166 | 2013,OAK,5 2167 | 2013,PHI,5 2168 | 2013,PIT,5 2169 | 2013,SDP,5 2170 | 2013,SEA,5 2171 | 2013,SFG,5 2172 | 2013,STL,5 2173 | 2013,TBD,5 2174 | 2013,TEX,5 2175 | 2013,TOR,5 2176 | 2013,WSN,5 2177 | 2014,ARI,5 2178 | 2014,ATL,5 2179 | 2014,BAL,5 2180 | 2014,BOS,5 2181 | 2014,CHW,5 2182 | 2014,CHC,5 2183 | 2014,CIN,5 2184 | 2014,CLE,5 2185 | 2014,COL,5 2186 | 2014,DET,5 2187 | 2014,HOU,5 2188 | 2014,KCR,5 2189 | 2014,ANA,5 2190 | 2014,LAD,5 2191 | 2014,FLA,5 2192 | 2014,MIL,5 2193 | 2014,MIN,5 2194 | 2014,NYY,5 2195 | 2014,NYM,5 2196 | 2014,OAK,5 2197 | 2014,PHI,5 2198 | 2014,PIT,5 2199 | 2014,SDP,5 2200 | 2014,SEA,5 2201 | 2014,SFG,5 2202 | 2014,STL,5 2203 | 2014,TBD,5 2204 | 2014,TEX,5 2205 | 2014,TOR,5 2206 | 2014,WSN,5 2207 | 1886,BLO,6 2208 | 1886,SFG,6 2209 | 1886,NYP,6 2210 | 1886,PHI,6 2211 | 1886,PIT,6 2212 | 1886,SLM,6 2213 | 1886,WNL,6 2214 | 1888,LAD,6 2215 | 1888,ATL,6 2216 | 1888,CHC,6 2217 | 1888,SFG,6 2218 | 1888,PHI,6 2219 | 1888,PIT,6 2220 | 1888,STL,6 2221 | 1888,WNL,6 2222 | 1890,BLO,6 2223 | 1890,CLS,6 2224 | 1892,CHC,6 2225 | 1892,CIN,6 2226 | 1892,CLV,6 2227 | 1892,LOU,6 2228 | 1898,CLV,6 2229 | 1898,PIT,6 2230 | 1900,CHC,6 2231 | 1900,PIT,6 2232 | 1901,ATL,6 2233 | 1901,CHC,6 2234 | 1901,SFG,6 2235 | 1901,PHI,6 2236 | 1901,PIT,6 2237 | 1902,LAD,6 2238 | 1902,ATL,6 2239 | 1902,CHW,6 2240 | 1902,CHC,6 2241 | 1902,CIN,6 2242 | 1902,DET,6 2243 | 1902,SFG,6 2244 | 1902,PHI,6 2245 | 1902,PIT,6 2246 | 1902,BAL,6 2247 | 1902,STL,6 2248 | 1903,BOS,6 2249 | 1903,CHW,6 2250 | 1903,CLE,6 2251 | 1903,DET,6 2252 | 1903,NYY,6 2253 | 1903,OAK,6 2254 | 1903,BAL,6 2255 | 1903,MIN,6 2256 | 1904,BOS,6 2257 | 1904,CHW,6 2258 | 1904,CLE,6 2259 | 1904,DET,6 2260 | 1904,NYY,6 2261 | 1904,OAK,6 2262 | 1904,BAL,6 2263 | 1904,MIN,6 2264 | 1910,LAD,6 2265 | 1910,ATL,6 2266 | 1910,CHC,6 2267 | 1910,CIN,6 2268 | 1910,SFG,6 2269 | 1910,PHI,6 2270 | 1910,PIT,6 2271 | 1910,STL,6 2272 | 1911,LAD,6 2273 | 1911,CHC,6 2274 | 1911,CIN,6 2275 | 1911,SFG,6 2276 | 1911,PHI,6 2277 | 1911,PIT,6 2278 | 1911,STL,6 2279 | 1912,LAD,6 2280 | 1912,CHC,6 2281 | 1912,CIN,6 2282 | 1912,SFG,6 2283 | 1912,PHI,6 2284 | 1912,PIT,6 2285 | 1913,BOS,6 2286 | 1913,LAD,6 2287 | 1913,ATL,6 2288 | 1913,CHW,6 2289 | 1913,CHC,6 2290 | 1913,CIN,6 2291 | 1913,CLE,6 2292 | 1913,DET,6 2293 | 1913,SFG,6 2294 | 1913,NYY,6 2295 | 1913,OAK,6 2296 | 1913,PHI,6 2297 | 1913,PIT,6 2298 | 1913,BAL,6 2299 | 1913,STL,6 2300 | 1913,MIN,6 2301 | 1914,BLT,6 2302 | 1914,BOS,6 2303 | 1914,BTT,6 2304 | 1914,LAD,6 2305 | 1914,ATL,6 2306 | 1914,BFL,6 2307 | 1914,CHW,6 2308 | 1914,CHH,6 2309 | 1914,CHC,6 2310 | 1914,CIN,6 2311 | 1914,CLE,6 2312 | 1914,DET,6 2313 | 1914,NEW,6 2314 | 1914,KCP,6 2315 | 1914,SFG,6 2316 | 1914,NYY,6 2317 | 1914,OAK,6 2318 | 1914,PHI,6 2319 | 1914,PIT,6 2320 | 1914,PBS,6 2321 | 1914,BAL,6 2322 | 1914,SLI,6 2323 | 1914,STL,6 2324 | 1914,MIN,6 2325 | 1915,BLT,6 2326 | 1915,BOS,6 2327 | 1915,BTT,6 2328 | 1915,LAD,6 2329 | 1915,ATL,6 2330 | 1915,BFL,6 2331 | 1915,CHW,6 2332 | 1915,CHH,6 2333 | 1915,CHC,6 2334 | 1915,CIN,6 2335 | 1915,CLE,6 2336 | 1915,DET,6 2337 | 1915,KCP,6 2338 | 1915,NEW,6 2339 | 1915,SFG,6 2340 | 1915,NYY,6 2341 | 1915,OAK,6 2342 | 1915,PHI,6 2343 | 1915,PIT,6 2344 | 1915,PBS,6 2345 | 1915,BAL,6 2346 | 1915,SLI,6 2347 | 1915,STL,6 2348 | 1915,MIN,6 2349 | 1916,BOS,6 2350 | 1916,LAD,6 2351 | 1916,ATL,6 2352 | 1916,CHW,6 2353 | 1916,CHC,6 2354 | 1916,CIN,6 2355 | 1916,CLE,6 2356 | 1916,DET,6 2357 | 1916,SFG,6 2358 | 1916,NYY,6 2359 | 1916,OAK,6 2360 | 1916,PHI,6 2361 | 1916,PIT,6 2362 | 1916,BAL,6 2363 | 1916,STL,6 2364 | 1916,MIN,6 2365 | 1917,BOS,6 2366 | 1917,LAD,6 2367 | 1917,ATL,6 2368 | 1917,CHW,6 2369 | 1917,CHC,6 2370 | 1917,CIN,6 2371 | 1917,CLE,6 2372 | 1917,DET,6 2373 | 1917,SFG,6 2374 | 1917,NYY,6 2375 | 1917,OAK,6 2376 | 1917,PHI,6 2377 | 1917,PIT,6 2378 | 1917,BAL,6 2379 | 1917,STL,6 2380 | 1917,MIN,6 2381 | 1918,BOS,6 2382 | 1918,LAD,6 2383 | 1918,ATL,6 2384 | 1918,CHW,6 2385 | 1918,CHC,6 2386 | 1918,CIN,6 2387 | 1918,CLE,6 2388 | 1918,DET,6 2389 | 1918,SFG,6 2390 | 1918,NYY,6 2391 | 1918,OAK,6 2392 | 1918,PHI,6 2393 | 1918,PIT,6 2394 | 1918,BAL,6 2395 | 1918,STL,6 2396 | 1918,MIN,6 2397 | 1919,BOS,6 2398 | 1919,LAD,6 2399 | 1919,ATL,6 2400 | 1919,CHW,6 2401 | 1919,CHC,6 2402 | 1919,CIN,6 2403 | 1919,CLE,6 2404 | 1919,DET,6 2405 | 1919,SFG,6 2406 | 1919,NYY,6 2407 | 1919,OAK,6 2408 | 1919,PIT,6 2409 | 1919,BAL,6 2410 | 1919,STL,6 2411 | 1919,MIN,6 2412 | 1920,LAD,6 2413 | 1920,ATL,6 2414 | 1920,CHC,6 2415 | 1920,CIN,6 2416 | 1920,SFG,6 2417 | 1920,PIT,6 2418 | 1921,CIN,6 2419 | 1926,LAD,6 2420 | 1926,CHC,6 2421 | 1926,OAK,6 2422 | 1927,LAD,6 2423 | 1928,LAD,6 2424 | 1931,ATL,6 2425 | 1933,ATL,6 2426 | 1933,CHC,6 2427 | 1933,CIN,6 2428 | 1933,SFG,6 2429 | 1937,ATL,6 2430 | 1937,CIN,6 2431 | 1938,ATL,6 2432 | 1940,CIN,6 2433 | 1940,PHI,6 2434 | 1941,CHC,6 2435 | 1941,CIN,6 2436 | 1942,LAD,6 2437 | 1942,ATL,6 2438 | 1942,CHW,6 2439 | 1942,CIN,6 2440 | 1942,CLE,6 2441 | 1942,DET,6 2442 | 1942,SFG,6 2443 | 1942,NYY,6 2444 | 1942,PHI,6 2445 | 1942,PIT,6 2446 | 1942,STL,6 2447 | 1943,BOS,6 2448 | 1943,ATL,6 2449 | 1943,CHW,6 2450 | 1943,CHC,6 2451 | 1943,CIN,6 2452 | 1943,CLE,6 2453 | 1943,DET,6 2454 | 1943,NYY,6 2455 | 1943,OAK,6 2456 | 1943,PHI,6 2457 | 1943,PIT,6 2458 | 1943,BAL,6 2459 | 1943,STL,6 2460 | 1943,MIN,6 2461 | 1944,ATL,6 2462 | 1944,CHW,6 2463 | 1944,CIN,6 2464 | 1944,DET,6 2465 | 1944,OAK,6 2466 | 1944,PHI,6 2467 | 1944,BAL,6 2468 | 1944,STL,6 2469 | 1944,MIN,6 2470 | 1945,CHW,6 2471 | 1945,CHC,6 2472 | 1945,CLE,6 2473 | 1945,DET,6 2474 | 1945,NYY,6 2475 | 1945,OAK,6 2476 | 1945,BAL,6 2477 | 1945,MIN,6 2478 | 1946,LAD,6 2479 | 1946,ATL,6 2480 | 1946,CHW,6 2481 | 1946,CHC,6 2482 | 1946,CIN,6 2483 | 1946,SFG,6 2484 | 1946,OAK,6 2485 | 1946,PHI,6 2486 | 1946,PIT,6 2487 | 1946,STL,6 2488 | 1947,CHW,6 2489 | 1947,CLE,6 2490 | 1947,DET,6 2491 | 1947,OAK,6 2492 | 1947,PHI,6 2493 | 1947,BAL,6 2494 | 1947,MIN,6 2495 | 1949,CLE,6 2496 | 1951,CIN,6 2497 | 1951,NYY,6 2498 | 1952,CHW,6 2499 | 1952,PHI,6 2500 | 1952,STL,6 2501 | 1952,MIN,6 2502 | 1953,CHW,6 2503 | 1953,MIN,6 2504 | 1954,CHW,6 2505 | 1954,DET,6 2506 | 1954,PHI,6 2507 | 1972,NYY,6 2508 | 1976,KCR,6 2509 | 1976,NYY,6 2510 | 1976,SDP,6 2511 | 1978,KCR,6 2512 | 1981,KCR,6 2513 | --------------------------------------------------------------------------------