├── notebooks ├── footyscripts │ ├── __init__.py │ └── footyviz.py ├── images │ └── pitch-lines.png ├── README.md ├── how to get full play-by-play data for the WC2014.ipynb └── player value appreciation in Portuguese vs. English clubs.ipynb ├── .gitignore ├── README ├── datasets ├── james-vs-barcelona-player-data.csv ├── GER_[3]-0_SRB_player_data.csv ├── BRA_0-[5]_GER_player_data.csv ├── AJA_[1]-0_PSV_player_data.csv └── open_xt_12x8_v1.json └── scripts └── footyviz.py /notebooks/footyscripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | notebooks/output/* 3 | notebooks/cache/* 4 | .vscode 5 | *.pyc 6 | datasets/external/* -------------------------------------------------------------------------------- /notebooks/images/pitch-lines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjtavares/football-crunching/HEAD/notebooks/images/pitch-lines.png -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This repository was created due to requests to share the code and data used to make a visualization I shared on reddit (/r/soccer and /r/dataisbeautiful). Hopefully, it will become a usefull repository of analysis and data about The Beautiful Game. -------------------------------------------------------------------------------- /datasets/james-vs-barcelona-player-data.csv: -------------------------------------------------------------------------------- 1 | player,team,num,name 2 | 11,defense,, 3 | 12,defense,, 4 | 13,defense,, 5 | 14,defense,, 6 | 15,defense,, 7 | 16,defense,, 8 | 17,defense,, 9 | 18,defense,, 10 | 19,defense,, 11 | 20,attack,, 12 | 21,attack,, 13 | 22,attack,, 14 | 23,attack,, 15 | 24,attack,, 16 | 25,attack,, 17 | -------------------------------------------------------------------------------- /datasets/GER_[3]-0_SRB_player_data.csv: -------------------------------------------------------------------------------- 1 | player,coords,player_num,player_obj,team,num,name,edgecolor,bgcolor 2 | 12,"(136.7259154955094, 166.22695610141977)",10.0,12,attack,,,black,white 3 | 1583,"(343.77343151714865, 51.91094646567434)",2.0,13,attack,,,black,white 4 | 2248,"(148.21786597279606, 166.84364103805248)",,14,defense,,,white,red 5 | 3802,"(142.8434015524876, 191.71472194218805)",,15,defense,,,white,red 6 | 4563,"(222.12243059774264, 132.0547213685634)",,16,defense,,,white,red 7 | 5867,"(312.24806696292546, 56.81699122099323)",,17,defense,,,white,red 8 | 6501,"(294.21750336437725, 39.77842997777177)",,18,defense,,,white,red 9 | 7502,"(305.08243042847624, 27.46527258165132)",,19,attack,,,black,white 10 | 9617,"(180.0, 266.0)",,20,attack,,,black,white 11 | 10678,"(185.09546430530065, 179.4068611404931)",,21,attack,,,black,white 12 | 367,"(389.7102803738318, 217.58878504672896)",,367,attack,,,black,white 13 | 2256,"(352.65625, 48.875)",,2256,defense,,,white,red 14 | 3652,"(21.0, 209.0)",,3652,defense,,,white,red 15 | 7300,"(525.9540131642823, 134.37566110704734)",,7300,attack,,,black,white 16 | 7301,"(467.872414972532, 132.18923619925005)",,7301,defense,,,white,red 17 | 7302,"(515.5298219098647, 80.98513960333622)",,7302,defense,,,white,red 18 | 8695,"(356.1584373638937, 121.37524525520615)",,8695,attack,,,black,white 19 | -------------------------------------------------------------------------------- /datasets/BRA_0-[5]_GER_player_data.csv: -------------------------------------------------------------------------------- 1 | player,coords,player_num,player_obj,team,num,name,edgecolor,bgcolor 2 | 12,"(390.82329814922076, 230.3681266177904)",5.0,12,attack,,,red,black 3 | 1923,"(192.70181613677647, 287.0911467596564)",6.0,1923,attack,,,red,black 4 | 3235,"(187.2173482005435, 323.5288842347714)",8.0,3235,attack,,,red,black 5 | 6439,"(218.60259895696848, 226.14081165461812)",,6439,attack,,,red,black 6 | 7600,"(174.46875, 274.625)",,7600,defense,,,blue,yellow 7 | 14761,"(215.71575859162712, 215.28004377345493)",,14761,defense,,,blue,yellow 8 | 16656,"(202.12091085738695, 337.46147632768816)",,16656,defense,,,blue,yellow 9 | 18912,"(190.5326064508669, 132.68650155862397)",,18912,defense,,,blue,yellow 10 | 21118,"(236.0, 151.0)",,21118,defense,,,blue,yellow 11 | 23899,"(342.6033758740193, 193.39115877679416)",,23899,defense,,,blue,yellow 12 | 25686,"(398.52750470586994, 214.27410783699221)",,25686,defense,,,blue,yellow 13 | 37572,"(287.10170479200815, 271.7485391685611)",,37572,attack,,,red,black 14 | 42428,"(198.4123749772249, 33.94978367111291)",,42428,attack,,,red,black 15 | 44136,"(278.972381708952, 56.237396503233526)",,44136,defense,,,blue,yellow 16 | 44137,"(341.40901702115536, 286.95268565748404)",,44137,defense,,,blue,yellow 17 | 48457,"(42.0, 246.0)",,48457,defense,,,blue,yellow 18 | 53809,"(350.0, 62.0)",,53809,attack,,,red,black 19 | 53810,"(429.0, 119.0)",,53810,attack,,,red,black 20 | 53811,"(356.0, 213.0)",,53811,attack,,,red,black 21 | 53812,"(359.0, 325.0)",,53812,attack,,,red,black 22 | 58367,"(265.0889494896123, 260.6871208118323)",,58367,defense,,,blue,yellow 23 | -------------------------------------------------------------------------------- /datasets/AJA_[1]-0_PSV_player_data.csv: -------------------------------------------------------------------------------- 1 | player,coords,player_num,player_obj,team,num,name,edgecolor,bgcolor 2 | 12,"(100.87270667830711, 358.9748447267472)",6.0,12,attack,,,red,white 3 | 2226,"(133.86573275787046, 251.93117519975567)",25.0,2226,attack,,,red,white 4 | 2479,"(146.78908512980206, 436.60298289276943)",10.0,2479,attack,,,red,white 5 | 2724,"(304.7370770398743, 453.89102084058067)",3.0,2724,attack,,,red,white 6 | 3724,"(336.67233172282727, 409.580526187905)",,3724,attack,,,red,white 7 | 4598,"(171.2732462779248, 309.6478339555086)",,4598,defense,,,orange,darkblue 8 | 4599,"(105.08270317163756, 310.1181738180273)",,4599,defense,,,orange,darkblue 9 | 4600,"(141.823473755787, 373.3303688574185)",,4600,defense,,,orange,darkblue 10 | 4601,"(194.04748324585225, 430.7146940198011)",,4601,defense,,,orange,darkblue 11 | 7745,"(184.70756950369795, 372.7604552055377)",,7745,defense,,,orange,darkblue 12 | 10076,"(271.3344218055786, 431.20759895156465)",,10076,defense,,,orange,darkblue 13 | 11521,"(140.0, 192.0)",,11521,defense,,,orange,darkblue 14 | 11522,"(178.5973943587801, 145.000517500412)",,11522,attack,,,red,white 15 | 14190,"(291.6829268292683, 201.5609756097561)",,14190,defense,,,orange,darkblue 16 | 14191,"(265.9268292682927, 349.8292682926829)",,14191,defense,,,orange,darkblue 17 | 14192,"(244.02439024390245, 334.3414634146341)",,14192,attack,,,red,white 18 | 14193,"(368.2439024390244, 207.70731707317074)",,14193,attack,,,red,white 19 | 14194,"(324.5609756097561, 189.8048780487805)",,14194,attack,,,red,white 20 | 16101,"(364.5121951219512, 301.7317073170732)",,16101,attack,,,red,white 21 | 16102,"(315.0, 429.0)",,16102,defense,,,orange,darkblue 22 | 19091,"(20.448323588800143, 260.14957692856564)",,19091,defense,,,orange,darkblue 23 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | List of Notebooks 2 | ----------------- 3 | 4 | # Getting and Working with Data 5 | 6 | * [how to get full world cup 2014 play-by-play data](http://nbviewer.ipython.org/github/rjtavares/football-crunching/blob/master/notebooks/how%20to%20get%20full%20play-by-play%20data%20for%20the%20WC2014.ipynb): Good football data is hard to come by. Basic stat counts are easily available, but full play data (i.e. a play broken down in its individual components: interceptions and tackles, runs, passes and shots, etc.) is very rare. And that's the most important unit in a team sport like football. So imagine my surprise and great joy when I came across a fantastic dataset of full play-by-play data for all World Cup matches. 7 | 8 | * [working with positional data](working%20with%20positional%20data.ipynb): This notebook shows how to plot positional data using matplotlib and some things you can do with full positional data that you can't with event based data. 9 | 10 | * [using Voronoi Diagrams](using%20voronoi%20diagrams.ipynb): Applied to football, a Voronoi diagram will partition the pitch into zones that have a single player as the closest to each point in that zone. This notebook shows how you can, having positional data, calculate and plot Voronoi Diagrams. 11 | 12 | 13 | # Analysis 14 | 15 | * [an exploratory analysis of the world cup final](http://nbviewer.ipython.org/github/rjtavares/football-crunching/blob/master/notebooks/an%20exploratory%20data%20analysis%20of%20the%20world%20cup%20final.ipynb): This notebook shows how you can use play-by-play data to analyse a football match, showing custom measures and visualizations to better understand the sport. 16 | 17 | 18 | # Metrics and Statistics 19 | 20 | * [how likely is it to score from 45 meters](http://nbviewer.ipython.org/github/rjtavares/football-crunching/blob/master/notebooks/how%20likely%20is%20it%20to%20score%20from%2045%20meters.ipynb): we use a logistic regression to model the probability of scoring based on the distance from the goal 21 | 22 | Other links: [Blog](https://medium.com/football-crunching) | [Twitter](https://twitter.com/lastrowview) | [Youtube](https://www.youtube.com/channel/UCz9ZhIX4DO16O_OMSC1_Udw) -------------------------------------------------------------------------------- /datasets/open_xt_12x8_v1.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | 0.00638303, 4 | 0.00779616, 5 | 0.00844854, 6 | 0.00977659, 7 | 0.01126267, 8 | 0.01248344, 9 | 0.01473596, 10 | 0.0174506, 11 | 0.02122129, 12 | 0.02756312, 13 | 0.03485072, 14 | 0.0379259 15 | ], 16 | [ 17 | 0.00750072, 18 | 0.00878589, 19 | 0.00942382, 20 | 0.0105949, 21 | 0.01214719, 22 | 0.0138454, 23 | 0.01611813, 24 | 0.01870347, 25 | 0.02401521, 26 | 0.02953272, 27 | 0.04066992, 28 | 0.04647721 29 | ], 30 | [ 31 | 0.0088799, 32 | 0.00977745, 33 | 0.01001304, 34 | 0.01110462, 35 | 0.01269174, 36 | 0.01429128, 37 | 0.01685596, 38 | 0.01935132, 39 | 0.0241224, 40 | 0.02855202, 41 | 0.05491138, 42 | 0.06442595 43 | ], 44 | [ 45 | 0.00941056, 46 | 0.01082722, 47 | 0.01016549, 48 | 0.01132376, 49 | 0.01262646, 50 | 0.01484598, 51 | 0.01689528, 52 | 0.0199707, 53 | 0.02385149, 54 | 0.03511326, 55 | 0.10805102, 56 | 0.25745362 57 | ], 58 | [ 59 | 0.00941056, 60 | 0.01082722, 61 | 0.01016549, 62 | 0.01132376, 63 | 0.01262646, 64 | 0.01484598, 65 | 0.01689528, 66 | 0.0199707, 67 | 0.02385149, 68 | 0.03511326, 69 | 0.10805102, 70 | 0.25745362 71 | ], 72 | [ 73 | 0.0088799, 74 | 0.00977745, 75 | 0.01001304, 76 | 0.01110462, 77 | 0.01269174, 78 | 0.01429128, 79 | 0.01685596, 80 | 0.01935132, 81 | 0.0241224, 82 | 0.02855202, 83 | 0.05491138, 84 | 0.06442595 85 | ], 86 | [ 87 | 0.00750072, 88 | 0.00878589, 89 | 0.00942382, 90 | 0.0105949, 91 | 0.01214719, 92 | 0.0138454, 93 | 0.01611813, 94 | 0.01870347, 95 | 0.02401521, 96 | 0.02953272, 97 | 0.04066992, 98 | 0.04647721 99 | ], 100 | [ 101 | 0.00638303, 102 | 0.00779616, 103 | 0.00844854, 104 | 0.00977659, 105 | 0.01126267, 106 | 0.01248344, 107 | 0.01473596, 108 | 0.0174506, 109 | 0.02122129, 110 | 0.02756312, 111 | 0.03485072, 112 | 0.0379259 113 | ] 114 | ] 115 | -------------------------------------------------------------------------------- /notebooks/footyscripts/footyviz.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from matplotlib.patches import Ellipse 4 | 5 | x_size = 105.0 6 | y_size = 68.0 7 | 8 | type_names = {1: 'PASS', 2:'OFFSIDE PASS', 3: 'DRIBBLE', 4:'FOUL (1-ON, 0-BY)', 5: 'PLAY_ACTORS', 9 | 6:'CORNER (1-WON, 0-GRANTED)', 7: 'TACKLE', 8:'INTERCEPTION', 10 | 10: 'SAVE/BLOCK', 11: 'GK GRAB BALL', 12: 'INTERCEPTION (NO CONTROL)', 11 | 13: 'SHOT OFF GOAL', 14: 'SHOT HIT POST', 15: 'SHOT ON GOAL', 16: 'GOAL', 12 | 17: 'YELLOW CARD', 18: 'SUBSTITUTION (OFF)', 19: 'SUBSTITUTION (ON)', 34:'????????', 13 | 41: 'GK PUNCH', 42: 'something awesome', 43: '???????????', 44:'HEADING DUEL', 14 | 45: 'TACKLE (MISSED)', 49: 'WON CONTROL OF BALL', 50:'LOST CONTROL OF BALL', 51:'INTERCEPTION (MISSED)', 15 | 52: 'gk action', 55:'offside defender', 56:'??????????', 59: 'gk action', 16 | 61: 'LOST CONTROL OF BALL', 74: 'CLEAR BALL (OUT OF PITCH)', 100: 'RECEPTION', 101: 'RUN WITH BALL', 102: 'LINEUP'} 17 | 18 | def draw_events(events, alpha=1, base_color='black', goal_color='red', mirror_away=False, arrows=True): 19 | for i, event in events.iterrows(): 20 | side = event['side'] 21 | if mirror_away: 22 | mirror = side=='A' 23 | else: 24 | mirror = False 25 | if mirror: 26 | x = x_size-event['x'] 27 | y = y_size-event['y'] 28 | dx = -(event['to_x']-event['x']) 29 | dy = -(event['to_y']-event['y']) 30 | else: 31 | x = event['x'] 32 | y = event['y'] 33 | dx = event['to_x']-event['x'] 34 | dy = event['to_y']-event['y'] 35 | 36 | if event['type']==16: 37 | color = goal_color 38 | else: 39 | color = base_color 40 | 41 | if pd.notnull(event['to_x']): 42 | if event['type']==101: 43 | style='dotted' 44 | head_width=1*arrows 45 | head_length=1*arrows 46 | else: 47 | style='solid' 48 | head_width=2*arrows 49 | head_length=2*arrows 50 | 51 | plt.arrow(x, y, dx, dy, head_width=head_width, head_length=head_length, linestyle=style, 52 | color=color, alpha=alpha, length_includes_head=True) 53 | else: 54 | plt.scatter(x,y, marker='x', color=color, alpha=alpha) 55 | 56 | def draw_pitch(): 57 | #set up field 58 | fig = plt.figure(figsize=(x_size/10, y_size/10)) 59 | fig.patch.set_facecolor('#78AB46') 60 | 61 | axes = fig.add_subplot(1, 1, 1, axisbg='#78AB46') 62 | 63 | axes.xaxis.set_visible(False) 64 | axes.yaxis.set_visible(False) 65 | 66 | plt.xlim([-5,x_size+5]) 67 | plt.ylim([-5,y_size+5]) 68 | 69 | box_height = ((16.5*2 + 7.32)/y_size)/1.15 70 | box_width = (16.5/x_size)/1.15 71 | 72 | team_colors = {'H': 'red', 73 | 'A': 'white'} 74 | 75 | r1 = plt.Rectangle((0.04338, 0.0641), (0.95652-0.04338), (0.9359-0.0641), 76 | edgecolor="white", facecolor="none", alpha=1, transform=axes.transAxes) #pitch 77 | 78 | r2 = plt.Line2D([0.5, 0.5], [0.9359, 0.0641], 79 | c='w', transform=axes.transAxes) #half-way line 80 | 81 | r3 = plt.Rectangle((0.04338, (1-box_height)/2), box_width, box_height, 82 | ec='w', fc='none', transform=axes.transAxes) #penalty area 83 | 84 | r4 = plt.Rectangle((0.95652-box_width, (1-box_height)/2), box_width, box_height, 85 | ec='w', fc='none', transform=axes.transAxes) #penalty area 86 | 87 | r5 = Ellipse((0.5, 0.5), 9.15*2/x_size, 9.15*2/y_size, 88 | ec='w', fc='none', transform=axes.transAxes) #middle circle 89 | 90 | fig.lines.extend([r1, r2, r3, r4, r5]) 91 | 92 | return fig, axes -------------------------------------------------------------------------------- /scripts/footyviz.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | from matplotlib.patches import Ellipse 3 | from matplotlib.collections import PatchCollection 4 | import matplotlib.patheffects as path_effects 5 | import numpy as np 6 | 7 | from scipy.spatial import Voronoi 8 | from shapely.geometry import Polygon 9 | 10 | X_SIZE = 105 11 | Y_SIZE = 68 12 | 13 | BOX_HEIGHT = (16.5*2 + 7.32)/Y_SIZE*100 14 | BOX_WIDTH = 16.5/X_SIZE*100 15 | 16 | GOAL = 7.32/Y_SIZE*100 17 | 18 | GOAL_AREA_HEIGHT = 5.4864*2/Y_SIZE*100 + GOAL 19 | GOAL_AREA_WIDTH = 5.4864/X_SIZE*100 20 | 21 | SCALERS = np.array([X_SIZE/100, Y_SIZE/100]) 22 | pitch_polygon = Polygon(((0,0), (0,100), (100,100), (100,0))) 23 | 24 | def draw_pitch(dpi=100, pitch_color='#a8bc95', fig=None, ax=None, size=1): 25 | """Sets up field 26 | Returns matplotlib fig and axes objects. 27 | """ 28 | if fig is None: 29 | figsize=(12.8*size, 7.2*size) 30 | fig = plt.figure(figsize=figsize, dpi=dpi) 31 | fig.patch.set_facecolor(pitch_color) 32 | 33 | if ax is None: 34 | ax = fig.add_subplot(1, 1, 1) 35 | ax.set_axis_off() 36 | ax.set_facecolor(pitch_color) 37 | ax.xaxis.set_visible(False) 38 | ax.yaxis.set_visible(False) 39 | 40 | ax.set_xlim(0,100) 41 | ax.set_ylim(0,100) 42 | 43 | plt.xlim([-13.32, 113.32]) 44 | plt.ylim([-5, 105]) 45 | 46 | fig.tight_layout(pad=3) 47 | 48 | draw_patches(ax) 49 | 50 | return fig, ax 51 | 52 | def draw_patches(axes): 53 | """ 54 | Draws basic field shapes on an axes 55 | """ 56 | #pitch 57 | axes.add_patch(plt.Rectangle((0, 0), 100, 100, 58 | edgecolor="white", facecolor="none")) 59 | 60 | #half-way line 61 | axes.add_line(plt.Line2D([50, 50], [100, 0], 62 | c='w')) 63 | 64 | #penalty areas 65 | axes.add_patch(plt.Rectangle((100-BOX_WIDTH, (100-BOX_HEIGHT)/2), BOX_WIDTH, BOX_HEIGHT, 66 | ec='w', fc='none')) 67 | axes.add_patch(plt.Rectangle((0, (100-BOX_HEIGHT)/2), BOX_WIDTH, BOX_HEIGHT, 68 | ec='w', fc='none')) 69 | 70 | #goal areas 71 | axes.add_patch(plt.Rectangle((100-GOAL_AREA_WIDTH, (100-GOAL_AREA_HEIGHT)/2), GOAL_AREA_WIDTH, GOAL_AREA_HEIGHT, 72 | ec='w', fc='none')) 73 | axes.add_patch(plt.Rectangle((0, (100-GOAL_AREA_HEIGHT)/2), GOAL_AREA_WIDTH, GOAL_AREA_HEIGHT, 74 | ec='w', fc='none')) 75 | 76 | #goals 77 | axes.add_patch(plt.Rectangle((100, (100-GOAL)/2), 1, GOAL, 78 | ec='w', fc='none')) 79 | axes.add_patch(plt.Rectangle((0, (100-GOAL)/2), -1, GOAL, 80 | ec='w', fc='none')) 81 | 82 | 83 | #halfway circle 84 | axes.add_patch(Ellipse((50, 50), 2*9.15/X_SIZE*100, 2*9.15/Y_SIZE*100, 85 | ec='w', fc='none')) 86 | 87 | return axes 88 | 89 | def draw_frame(df, t, fig=None, ax=None, size=1, dpi=100, fps=20, label='player_num', show_players=True, 90 | highlight_color=None, highlight_player=None, text_size=8, text_color='white', flip=False, voronoi=False, **anim_args): 91 | """ 92 | Draws players from time t (in seconds) from a DataFrame df 93 | """ 94 | fig, ax = draw_pitch(dpi=dpi, fig=fig, ax=ax, size=size) 95 | 96 | dfFrame = get_frame(df, t, fps=fps) 97 | 98 | if show_players: 99 | fig, ax, dfFrame = add_players(fig, ax, dfFrame, 100 | label=label, highlight_color=highlight_color, highlight_player= highlight_player, 101 | text_size=text_size, text_color=text_color) 102 | if voronoi == True: 103 | fig, ax, dfFrame = add_voronoi(fig, ax, dfFrame) 104 | return fig, ax, dfFrame 105 | 106 | def add_players(fig, ax, dfFrame, label='player_num', highlight_color=None, highlight_player=None, text_size=8, text_color='white'): 107 | for pid in dfFrame.index: 108 | if pid==0: 109 | #se for bola 110 | try: 111 | z = dfFrame.loc[pid]['z'] 112 | except: 113 | z = 0 114 | size = 1.2+z 115 | lw = 0.9 116 | color='black' 117 | edge='white' 118 | zorder = 100 119 | else: 120 | #se for jogador 121 | size = 3 122 | lw = 2 123 | edge = dfFrame.loc[pid]['edgecolor'] 124 | 125 | if pid == highlight_player: 126 | color = highlight_color 127 | else: 128 | color = dfFrame.loc[pid]['bgcolor'] 129 | if dfFrame.loc[pid]['team']=='attack': 130 | zorder = 21 131 | else: 132 | zorder = 20 133 | 134 | ax.add_artist(Ellipse((dfFrame.loc[pid]['x'], 135 | dfFrame.loc[pid]['y']), 136 | size/X_SIZE*100, size/Y_SIZE*100, 137 | edgecolor=edge, 138 | linewidth=lw, 139 | facecolor=color, 140 | alpha=0.8, 141 | zorder=zorder)) 142 | 143 | if text_color is not None: 144 | try: 145 | s = dfFrame.loc[pid][label] 146 | if isinstance(s, str)==False: 147 | s = str(int(label)) 148 | except: 149 | s = '' 150 | text = plt.text(dfFrame.loc[pid]['x'],dfFrame.loc[pid]['y'],s, 151 | horizontalalignment='center', verticalalignment='center', 152 | fontsize=text_size, color=text_color, zorder=22, alpha=0.8) 153 | 154 | text.set_path_effects([path_effects.Stroke(linewidth=1, foreground=text_color, alpha=0.8), 155 | path_effects.Normal()]) 156 | 157 | return fig, ax, dfFrame 158 | 159 | 160 | def add_voronoi(fig, ax, dfFrame): 161 | polygons = {} 162 | vor, dfVor = calculate_voronoi(dfFrame) 163 | for index, region in enumerate(vor.regions): 164 | if not -1 in region: 165 | if len(region)>0: 166 | try: 167 | pl = dfVor[dfVor['region']==index] 168 | polygon = Polygon([vor.vertices[i] for i in region]/SCALERS).intersection(pitch_polygon) 169 | polygons[pl.index[0]] = polygon 170 | color = pl['bgcolor'].values[0] 171 | x, y = polygon.exterior.xy 172 | plt.fill(x, y, c=color, alpha=0.30) 173 | except IndexError: 174 | pass 175 | except AttributeError: 176 | pass 177 | 178 | plt.scatter(dfVor['x'], dfVor['y'], c=dfVor['bgcolor'], alpha=0.2) 179 | 180 | return fig, ax, dfFrame 181 | 182 | def calculate_voronoi(dfFrame): 183 | dfTemp = dfFrame.copy().drop(0, errors='ignore') 184 | 185 | values = np.vstack((dfTemp[['x', 'y']].values*SCALERS, 186 | [-1000,-1000], 187 | [+1000,+1000], 188 | [+1000,-1000], 189 | [-1000,+1000] 190 | )) 191 | 192 | vor = Voronoi(values) 193 | 194 | dfTemp['region'] = vor.point_region[:-4] 195 | 196 | return vor, dfTemp 197 | 198 | def get_frame(df, t, fps=20): 199 | dfFrame = df.loc[int(t*fps)].set_index('player') 200 | return dfFrame -------------------------------------------------------------------------------- /notebooks/how to get full play-by-play data for the WC2014.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:ff8679825ca5378d17b005803647000fd6aae74bab00fe8322dde6bf52c31947" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "How to get full play-by-play data for the WC2014" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Good football data is hard to come by. Basic stat counts are easily available, but full play data (i.e. a play broken down in its individual components: interceptions and tackles, runs, passes and shots, etc.) is very rare. And that's the most important unit in a team sport like football. So imagine my surprise and great joy when I came across a fantastic dataset of full play-by-play data for all World Cup matches.\n", 24 | "\n", 25 | "After spending some time in the wonderful world of web scraping, one becomes aware of hints that something worthwhile is going on. Whenever I see a pretty interactive chart on a web-page like the great [Huff Post Data's World Cup page](http://data.huffingtonpost.com/2014/world-cup), my spider sense starts tingling.\n", 26 | "\n", 27 | "The first thing to do is to make sure that the site is using only html5 and js. Check.\n", 28 | "\n", 29 | "OK, so how is the website sending the data to the browser? Developer tools in Chrome is your friend: network tab, filter \"json\".\n", 30 | "\n", 31 | "*Bingo.*\n", 32 | "\n", 33 | "The website was sending the full dataset to the browser." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "collapsed": false, 39 | "input": [ 40 | "#imports\n", 41 | "import requests\n", 42 | "import json\n", 43 | "import mechanize\n", 44 | "from bs4 import BeautifulSoup\n", 45 | "import time\n", 46 | "\n", 47 | "#initializes the browser\n", 48 | "br = mechanize.Browser()\n", 49 | "br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=10)\n", 50 | "br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]\n" 51 | ], 52 | "language": "python", 53 | "metadata": {}, 54 | "outputs": [], 55 | "prompt_number": 2 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "Looking at a match page, you see that all links are listed in a handy menu. Looking at the html code, you can see that they are inside a span tag with class set to \"matchup\".\n", 62 | "\n", 63 | "So the following code gets you all the links:" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "collapsed": false, 69 | "input": [ 70 | "starting_link = 'http://data.huffingtonpost.com/2014/world-cup/matches/belgium-vs-usa-731822'\n", 71 | "response = mechanize.urlopen(starting_link)\n", 72 | "soup = BeautifulSoup(response)\n", 73 | "\n", 74 | "links_html = soup.find_all(\"span\", class_=\"matchup\")\n", 75 | "\n", 76 | "links = []\n", 77 | "\n", 78 | "for link_html in links_html:\n", 79 | " a = link_html.find_all('a')\n", 80 | " for l in a:\n", 81 | " link = l.get('href')\n", 82 | " link = link.split('/')[-1]\n", 83 | "\n", 84 | " links.append(link)\n", 85 | " \n", 86 | "links[:5]" 87 | ], 88 | "language": "python", 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "metadata": {}, 93 | "output_type": "pyout", 94 | "prompt_number": 9, 95 | "text": [ 96 | "['brazil-vs-chile-731815',\n", 97 | " 'colombia-vs-uruguay-731816',\n", 98 | " 'netherlands-vs-mexico-731817',\n", 99 | " 'costa-rica-vs-greece-731818',\n", 100 | " 'france-vs-nigeria-731819']" 101 | ] 102 | } 103 | ], 104 | "prompt_number": 9 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "With this information we can get all the match data trough a simple request, which gives back easily readable json." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "collapsed": false, 116 | "input": [ 117 | "def get_match_data(match):\n", 118 | " match_id = match.split('-')[-1]\n", 119 | " response = mechanize.urlopen('http://data.huffingtonpost.com/2014/world-cup/matches/%s.json' % match_id)\n", 120 | "\n", 121 | " match_data = json.loads(response.read())\n", 122 | " \n", 123 | " return match_data\n", 124 | "\n", 125 | "match_data = get_match_data(links[0]) # test\n", 126 | "\n", 127 | "match_data.keys()" 128 | ], 129 | "language": "python", 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "metadata": {}, 134 | "output_type": "pyout", 135 | "prompt_number": 10, 136 | "text": [ 137 | "[u'team_stats', u'events', u'summary']" 138 | ] 139 | } 140 | ], 141 | "prompt_number": 10 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Unfortunately, the data includes IDs only. The page has names, though, so there must be some conversion taking place. At this point, I was scared that I to look through all script files and javascript code to see where the conversion took place.\n", 148 | "\n", 149 | "However, the first (and obvious) step was enough: simply searching for a player's name in the main page source showed that variables HPIN.teams and HPIN.players contained the names and IDs, plus a bunch of other information (like position, birth date and even preferred foot). The script tag that defined the variables has no class or id, so we could only identify it by its position." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "collapsed": false, 155 | "input": [ 156 | "def get_match_names(match):\n", 157 | " response = mechanize.urlopen('http://data.huffingtonpost.com/2014/world-cup/matches/%s' % links[0]) #example page\n", 158 | " soup = BeautifulSoup(response)\n", 159 | "\n", 160 | " data = {}\n", 161 | "\n", 162 | " data_script = soup.findAll(\"script\")[1] #gets the second script block. Hopefully all pages follow the same format\n", 163 | " data_lines = data_script.text.split('\\n')\n", 164 | "\n", 165 | " for line in data_lines[1:]:\n", 166 | " try:\n", 167 | " #format of a variable is HPIN.variable = [list of dictionaries]\n", 168 | " #this tries to convert it to \n", 169 | " line_data = line.split(' = ')\n", 170 | " name = line_data[0].split('.')[1]\n", 171 | " value = json.loads(line_data[1][:-1])\n", 172 | " data[name] = value\n", 173 | " except:\n", 174 | " print \"error parsing string: \", line #should only occur on blank lines - yeah, I know, lazy exception handling...\n", 175 | " \n", 176 | " return data\n", 177 | "\n", 178 | "names = get_match_names(links[0])\n", 179 | "names.keys()" 180 | ], 181 | "language": "python", 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "output_type": "stream", 186 | "stream": "stdout", 187 | "text": [ 188 | "error parsing string: \n" 189 | ] 190 | }, 191 | { 192 | "metadata": {}, 193 | "output_type": "pyout", 194 | "prompt_number": 11, 195 | "text": [ 196 | "[u'statCategories',\n", 197 | " u'awayTeam',\n", 198 | " u'callbackPath',\n", 199 | " u'homeTeam',\n", 200 | " u'teams',\n", 201 | " u'players',\n", 202 | " u'imageCallbackPath',\n", 203 | " u'imageCallbackInterval',\n", 204 | " u'twitterUrl']" 205 | ] 206 | } 207 | ], 208 | "prompt_number": 11 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "Alright, so now we have all the match links, a function that returns the events and stats from each match, and a function that returns the players and team names. Let's put it all together. First, create a dictionary:" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "collapsed": false, 220 | "input": [ 221 | "data = {}" 222 | ], 223 | "language": "python", 224 | "metadata": {}, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "Then, execute a loop that will get the data from all the matches and add it to the dictionary. The `if` statement ensures you don't have to reprocess a match in the case you have to run the cell again (e.g. due a network error)." 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "collapsed": true, 237 | "input": [ 238 | "for match in links:\n", 239 | " if match not in data:\n", 240 | " print match\n", 241 | " time.sleep(60)\n", 242 | "\n", 243 | " match_data = get_match_data(match)\n", 244 | " match_names = get_match_names(match)\n", 245 | " data[match] = {'data': match_data, 'names': match_names}\n", 246 | "\n", 247 | " print match, \" done\" \n", 248 | " else:\n", 249 | " print match, \" already processed\"\n" 250 | ], 251 | "language": "python", 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "output_type": "stream", 256 | "stream": "stdout", 257 | "text": [ 258 | "brazil-vs-chile-731815\n", 259 | "error parsing string: " 260 | ] 261 | }, 262 | { 263 | "output_type": "stream", 264 | "stream": "stdout", 265 | "text": [ 266 | " \n", 267 | "brazil-vs-chile-731815 done\n", 268 | "colombia-vs-uruguay-731816\n", 269 | "error parsing string: " 270 | ] 271 | }, 272 | { 273 | "output_type": "stream", 274 | "stream": "stdout", 275 | "text": [ 276 | " \n", 277 | "colombia-vs-uruguay-731816 done\n", 278 | "netherlands-vs-mexico-731817\n", 279 | "error parsing string: " 280 | ] 281 | }, 282 | { 283 | "output_type": "stream", 284 | "stream": "stdout", 285 | "text": [ 286 | " \n", 287 | "netherlands-vs-mexico-731817 done\n", 288 | "costa-rica-vs-greece-731818\n", 289 | "error parsing string: " 290 | ] 291 | }, 292 | { 293 | "output_type": "stream", 294 | "stream": "stdout", 295 | "text": [ 296 | " \n", 297 | "costa-rica-vs-greece-731818 done\n", 298 | "france-vs-nigeria-731819\n", 299 | "error parsing string: " 300 | ] 301 | }, 302 | { 303 | "output_type": "stream", 304 | "stream": "stdout", 305 | "text": [ 306 | " \n", 307 | "france-vs-nigeria-731819 done\n", 308 | "germany-vs-algeria-731820\n", 309 | "error parsing string: " 310 | ] 311 | }, 312 | { 313 | "output_type": "stream", 314 | "stream": "stdout", 315 | "text": [ 316 | " \n", 317 | "germany-vs-algeria-731820 done\n", 318 | "argentina-vs-switzerland-731821\n", 319 | "error parsing string: " 320 | ] 321 | }, 322 | { 323 | "output_type": "stream", 324 | "stream": "stdout", 325 | "text": [ 326 | " \n", 327 | "argentina-vs-switzerland-731821 done\n", 328 | "belgium-vs-usa-731822\n", 329 | "error parsing string: " 330 | ] 331 | }, 332 | { 333 | "output_type": "stream", 334 | "stream": "stdout", 335 | "text": [ 336 | " \n", 337 | "belgium-vs-usa-731822 done\n", 338 | "france-vs-germany-731824\n", 339 | "error parsing string: " 340 | ] 341 | }, 342 | { 343 | "output_type": "stream", 344 | "stream": "stdout", 345 | "text": [ 346 | " \n", 347 | "france-vs-germany-731824 done\n", 348 | "brazil-vs-colombia-731823\n", 349 | "error parsing string: " 350 | ] 351 | }, 352 | { 353 | "output_type": "stream", 354 | "stream": "stdout", 355 | "text": [ 356 | " \n", 357 | "brazil-vs-colombia-731823 done\n", 358 | "argentina-vs-belgium-731826\n", 359 | "error parsing string: " 360 | ] 361 | }, 362 | { 363 | "output_type": "stream", 364 | "stream": "stdout", 365 | "text": [ 366 | " \n", 367 | "argentina-vs-belgium-731826 done\n", 368 | "netherlands-vs-costa-rica-731825\n", 369 | "error parsing string: " 370 | ] 371 | }, 372 | { 373 | "output_type": "stream", 374 | "stream": "stdout", 375 | "text": [ 376 | " \n", 377 | "netherlands-vs-costa-rica-731825 done\n", 378 | "brazil-vs-germany-731827\n", 379 | "error parsing string: " 380 | ] 381 | }, 382 | { 383 | "output_type": "stream", 384 | "stream": "stdout", 385 | "text": [ 386 | " \n", 387 | "brazil-vs-germany-731827 done\n", 388 | "netherlands-vs-argentina-731828\n", 389 | "error parsing string: " 390 | ] 391 | }, 392 | { 393 | "output_type": "stream", 394 | "stream": "stdout", 395 | "text": [ 396 | " \n", 397 | "netherlands-vs-argentina-731828 done\n", 398 | "brazil-vs-netherlands-731829\n", 399 | "error parsing string: " 400 | ] 401 | }, 402 | { 403 | "output_type": "stream", 404 | "stream": "stdout", 405 | "text": [ 406 | " \n", 407 | "brazil-vs-netherlands-731829 done\n", 408 | "germany-vs-argentina-731830\n", 409 | "error parsing string: " 410 | ] 411 | }, 412 | { 413 | "output_type": "stream", 414 | "stream": "stdout", 415 | "text": [ 416 | " \n", 417 | "germany-vs-argentina-731830 done\n", 418 | "brazil-vs-croatia-731767\n", 419 | "error parsing string: " 420 | ] 421 | }, 422 | { 423 | "output_type": "stream", 424 | "stream": "stdout", 425 | "text": [ 426 | " \n", 427 | "brazil-vs-croatia-731767 done\n", 428 | "mexico-vs-cameroon-731768\n", 429 | "error parsing string: " 430 | ] 431 | }, 432 | { 433 | "output_type": "stream", 434 | "stream": "stdout", 435 | "text": [ 436 | " \n", 437 | "mexico-vs-cameroon-731768 done\n", 438 | "brazil-vs-mexico-731783\n", 439 | "error parsing string: " 440 | ] 441 | }, 442 | { 443 | "output_type": "stream", 444 | "stream": "stdout", 445 | "text": [ 446 | " \n", 447 | "brazil-vs-mexico-731783 done\n", 448 | "cameroon-vs-croatia-731784\n", 449 | "error parsing string: " 450 | ] 451 | }, 452 | { 453 | "output_type": "stream", 454 | "stream": "stdout", 455 | "text": [ 456 | " \n", 457 | "cameroon-vs-croatia-731784 done\n", 458 | "croatia-vs-mexico-731800\n", 459 | "error parsing string: " 460 | ] 461 | }, 462 | { 463 | "output_type": "stream", 464 | "stream": "stdout", 465 | "text": [ 466 | " \n", 467 | "croatia-vs-mexico-731800 done\n", 468 | "cameroon-vs-brazil-731799\n", 469 | "error parsing string: " 470 | ] 471 | }, 472 | { 473 | "output_type": "stream", 474 | "stream": "stdout", 475 | "text": [ 476 | " \n", 477 | "cameroon-vs-brazil-731799 done\n", 478 | "spain-vs-netherlands-731769\n", 479 | "error parsing string: " 480 | ] 481 | }, 482 | { 483 | "output_type": "stream", 484 | "stream": "stdout", 485 | "text": [ 486 | " \n", 487 | "spain-vs-netherlands-731769 done\n", 488 | "chile-vs-australia-731770\n", 489 | "error parsing string: " 490 | ] 491 | }, 492 | { 493 | "output_type": "stream", 494 | "stream": "stdout", 495 | "text": [ 496 | " \n", 497 | "chile-vs-australia-731770 done\n", 498 | "australia-vs-netherlands-731786\n", 499 | "error parsing string: " 500 | ] 501 | }, 502 | { 503 | "output_type": "stream", 504 | "stream": "stdout", 505 | "text": [ 506 | " \n", 507 | "australia-vs-netherlands-731786 done\n", 508 | "spain-vs-chile-731785\n", 509 | "error parsing string: " 510 | ] 511 | }, 512 | { 513 | "output_type": "stream", 514 | "stream": "stdout", 515 | "text": [ 516 | " \n", 517 | "spain-vs-chile-731785 done\n", 518 | "netherlands-vs-chile-731802\n", 519 | "error parsing string: " 520 | ] 521 | }, 522 | { 523 | "output_type": "stream", 524 | "stream": "stdout", 525 | "text": [ 526 | " \n", 527 | "netherlands-vs-chile-731802 done\n", 528 | "australia-vs-spain-731801\n", 529 | "error parsing string: " 530 | ] 531 | }, 532 | { 533 | "output_type": "stream", 534 | "stream": "stdout", 535 | "text": [ 536 | " \n", 537 | "australia-vs-spain-731801 done\n", 538 | "colombia-vs-greece-731771\n", 539 | "error parsing string: " 540 | ] 541 | }, 542 | { 543 | "output_type": "stream", 544 | "stream": "stdout", 545 | "text": [ 546 | " \n", 547 | "colombia-vs-greece-731771 done\n", 548 | "ivory-coast-vs-japan-731772\n", 549 | "error parsing string: " 550 | ] 551 | }, 552 | { 553 | "output_type": "stream", 554 | "stream": "stdout", 555 | "text": [ 556 | " \n", 557 | "ivory-coast-vs-japan-731772 done\n", 558 | "colombia-vs-ivory-coast-731787\n", 559 | "error parsing string: " 560 | ] 561 | }, 562 | { 563 | "output_type": "stream", 564 | "stream": "stdout", 565 | "text": [ 566 | " \n", 567 | "colombia-vs-ivory-coast-731787 done\n", 568 | "japan-vs-greece-731788\n", 569 | "error parsing string: " 570 | ] 571 | }, 572 | { 573 | "output_type": "stream", 574 | "stream": "stdout", 575 | "text": [ 576 | " \n", 577 | "japan-vs-greece-731788 done\n", 578 | "japan-vs-colombia-731803\n", 579 | "error parsing string: " 580 | ] 581 | }, 582 | { 583 | "output_type": "stream", 584 | "stream": "stdout", 585 | "text": [ 586 | " \n", 587 | "japan-vs-colombia-731803 done\n", 588 | "greece-vs-ivory-coast-731804\n", 589 | "error parsing string: " 590 | ] 591 | }, 592 | { 593 | "output_type": "stream", 594 | "stream": "stdout", 595 | "text": [ 596 | " \n", 597 | "greece-vs-ivory-coast-731804 done\n", 598 | "uruguay-vs-costa-rica-731773\n", 599 | "error parsing string: " 600 | ] 601 | }, 602 | { 603 | "output_type": "stream", 604 | "stream": "stdout", 605 | "text": [ 606 | " \n", 607 | "uruguay-vs-costa-rica-731773 done\n", 608 | "england-vs-italy-731774\n", 609 | "error parsing string: " 610 | ] 611 | }, 612 | { 613 | "output_type": "stream", 614 | "stream": "stdout", 615 | "text": [ 616 | " \n", 617 | "england-vs-italy-731774 done\n", 618 | "uruguay-vs-england-731789\n", 619 | "error parsing string: " 620 | ] 621 | }, 622 | { 623 | "output_type": "stream", 624 | "stream": "stdout", 625 | "text": [ 626 | " \n", 627 | "uruguay-vs-england-731789 done\n", 628 | "italy-vs-costa-rica-731790\n", 629 | "error parsing string: " 630 | ] 631 | }, 632 | { 633 | "output_type": "stream", 634 | "stream": "stdout", 635 | "text": [ 636 | " \n", 637 | "italy-vs-costa-rica-731790 done\n", 638 | "italy-vs-uruguay-731805\n", 639 | "error parsing string: " 640 | ] 641 | }, 642 | { 643 | "output_type": "stream", 644 | "stream": "stdout", 645 | "text": [ 646 | " \n", 647 | "italy-vs-uruguay-731805 done\n", 648 | "costa-rica-vs-england-731806\n", 649 | "error parsing string: " 650 | ] 651 | }, 652 | { 653 | "output_type": "stream", 654 | "stream": "stdout", 655 | "text": [ 656 | " \n", 657 | "costa-rica-vs-england-731806 done\n", 658 | "switzerland-vs-ecuador-731775\n", 659 | "error parsing string: " 660 | ] 661 | }, 662 | { 663 | "output_type": "stream", 664 | "stream": "stdout", 665 | "text": [ 666 | " \n", 667 | "switzerland-vs-ecuador-731775 done\n", 668 | "france-vs-honduras-731776\n", 669 | "error parsing string: " 670 | ] 671 | }, 672 | { 673 | "output_type": "stream", 674 | "stream": "stdout", 675 | "text": [ 676 | " \n", 677 | "france-vs-honduras-731776 done\n", 678 | "switzerland-vs-france-731791\n", 679 | "error parsing string: " 680 | ] 681 | }, 682 | { 683 | "output_type": "stream", 684 | "stream": "stdout", 685 | "text": [ 686 | " \n", 687 | "switzerland-vs-france-731791 done\n", 688 | "honduras-vs-ecuador-731792\n", 689 | "error parsing string: " 690 | ] 691 | }, 692 | { 693 | "output_type": "stream", 694 | "stream": "stdout", 695 | "text": [ 696 | " \n", 697 | "honduras-vs-ecuador-731792 done\n", 698 | "ecuador-vs-france-731808\n", 699 | "error parsing string: " 700 | ] 701 | }, 702 | { 703 | "output_type": "stream", 704 | "stream": "stdout", 705 | "text": [ 706 | " \n", 707 | "ecuador-vs-france-731808 done\n", 708 | "honduras-vs-switzerland-731807\n", 709 | "error parsing string: " 710 | ] 711 | }, 712 | { 713 | "output_type": "stream", 714 | "stream": "stdout", 715 | "text": [ 716 | " \n", 717 | "honduras-vs-switzerland-731807 done\n", 718 | "argentina-vs-bosnia-herz-731777\n", 719 | "error parsing string: " 720 | ] 721 | }, 722 | { 723 | "output_type": "stream", 724 | "stream": "stdout", 725 | "text": [ 726 | " \n", 727 | "argentina-vs-bosnia-herz-731777 done\n", 728 | "iran-vs-nigeria-731778\n", 729 | "error parsing string: " 730 | ] 731 | }, 732 | { 733 | "output_type": "stream", 734 | "stream": "stdout", 735 | "text": [ 736 | " \n", 737 | "iran-vs-nigeria-731778 done\n", 738 | "argentina-vs-iran-731793\n", 739 | "error parsing string: " 740 | ] 741 | }, 742 | { 743 | "output_type": "stream", 744 | "stream": "stdout", 745 | "text": [ 746 | " \n", 747 | "argentina-vs-iran-731793 done\n", 748 | "nigeria-vs-bosnia-herz-731794\n", 749 | "error parsing string: " 750 | ] 751 | }, 752 | { 753 | "output_type": "stream", 754 | "stream": "stdout", 755 | "text": [ 756 | " \n", 757 | "nigeria-vs-bosnia-herz-731794 done\n", 758 | "nigeria-vs-argentina-731809\n", 759 | "error parsing string: " 760 | ] 761 | }, 762 | { 763 | "output_type": "stream", 764 | "stream": "stdout", 765 | "text": [ 766 | " \n", 767 | "nigeria-vs-argentina-731809 done\n", 768 | "bosnia-herz-vs-iran-731810\n", 769 | "error parsing string: " 770 | ] 771 | }, 772 | { 773 | "output_type": "stream", 774 | "stream": "stdout", 775 | "text": [ 776 | " \n", 777 | "bosnia-herz-vs-iran-731810 done\n", 778 | "germany-vs-portugal-731779\n", 779 | "error parsing string: " 780 | ] 781 | }, 782 | { 783 | "output_type": "stream", 784 | "stream": "stdout", 785 | "text": [ 786 | " \n", 787 | "germany-vs-portugal-731779 done\n", 788 | "ghana-vs-usa-731780\n", 789 | "error parsing string: " 790 | ] 791 | }, 792 | { 793 | "output_type": "stream", 794 | "stream": "stdout", 795 | "text": [ 796 | " \n", 797 | "ghana-vs-usa-731780 done\n", 798 | "germany-vs-ghana-731795\n", 799 | "error parsing string: " 800 | ] 801 | }, 802 | { 803 | "output_type": "stream", 804 | "stream": "stdout", 805 | "text": [ 806 | " \n", 807 | "germany-vs-ghana-731795 done\n", 808 | "usa-vs-portugal-731796\n", 809 | "error parsing string: " 810 | ] 811 | }, 812 | { 813 | "output_type": "stream", 814 | "stream": "stdout", 815 | "text": [ 816 | " \n", 817 | "usa-vs-portugal-731796 done\n", 818 | "portugal-vs-ghana-731812\n", 819 | "error parsing string: " 820 | ] 821 | }, 822 | { 823 | "output_type": "stream", 824 | "stream": "stdout", 825 | "text": [ 826 | " \n", 827 | "portugal-vs-ghana-731812 done\n", 828 | "usa-vs-germany-731811\n", 829 | "error parsing string: " 830 | ] 831 | }, 832 | { 833 | "output_type": "stream", 834 | "stream": "stdout", 835 | "text": [ 836 | " \n", 837 | "usa-vs-germany-731811 done\n", 838 | "belgium-vs-algeria-731781\n", 839 | "error parsing string: " 840 | ] 841 | }, 842 | { 843 | "output_type": "stream", 844 | "stream": "stdout", 845 | "text": [ 846 | " \n", 847 | "belgium-vs-algeria-731781 done\n", 848 | "russia-vs-south-korea-731782\n", 849 | "error parsing string: " 850 | ] 851 | }, 852 | { 853 | "output_type": "stream", 854 | "stream": "stdout", 855 | "text": [ 856 | " \n", 857 | "russia-vs-south-korea-731782 done\n", 858 | "belgium-vs-russia-731797\n", 859 | "error parsing string: " 860 | ] 861 | }, 862 | { 863 | "output_type": "stream", 864 | "stream": "stdout", 865 | "text": [ 866 | " \n", 867 | "belgium-vs-russia-731797 done\n", 868 | "south-korea-vs-algeria-731798\n", 869 | "error parsing string: " 870 | ] 871 | }, 872 | { 873 | "output_type": "stream", 874 | "stream": "stdout", 875 | "text": [ 876 | " \n", 877 | "south-korea-vs-algeria-731798 done\n", 878 | "algeria-vs-russia-731814\n", 879 | "error parsing string: " 880 | ] 881 | }, 882 | { 883 | "output_type": "stream", 884 | "stream": "stdout", 885 | "text": [ 886 | " \n", 887 | "algeria-vs-russia-731814 done\n", 888 | "south-korea-vs-belgium-731813\n", 889 | "error parsing string: " 890 | ] 891 | }, 892 | { 893 | "output_type": "stream", 894 | "stream": "stdout", 895 | "text": [ 896 | " \n", 897 | "south-korea-vs-belgium-731813 done\n", 898 | "brazil-vs-chile-731815 already processed\n", 899 | "colombia-vs-uruguay-731816 already processed\n", 900 | "netherlands-vs-mexico-731817 already processed\n", 901 | "costa-rica-vs-greece-731818 already processed\n", 902 | "france-vs-nigeria-731819 already processed\n", 903 | "germany-vs-algeria-731820 already processed\n", 904 | "argentina-vs-switzerland-731821 already processed\n", 905 | "belgium-vs-usa-731822 already processed\n", 906 | "france-vs-germany-731824 already processed\n", 907 | "brazil-vs-colombia-731823 already processed\n", 908 | "argentina-vs-belgium-731826 already processed\n", 909 | "netherlands-vs-costa-rica-731825 already processed\n", 910 | "brazil-vs-germany-731827 already processed\n", 911 | "netherlands-vs-argentina-731828 already processed\n", 912 | "brazil-vs-netherlands-731829 already processed\n", 913 | "germany-vs-argentina-731830 already processed\n", 914 | "brazil-vs-croatia-731767 already processed\n", 915 | "mexico-vs-cameroon-731768 already processed\n", 916 | "brazil-vs-mexico-731783 already processed\n", 917 | "cameroon-vs-croatia-731784 already processed\n", 918 | "croatia-vs-mexico-731800 already processed\n", 919 | "cameroon-vs-brazil-731799 already processed\n", 920 | "spain-vs-netherlands-731769 already processed\n", 921 | "chile-vs-australia-731770 already processed\n", 922 | "australia-vs-netherlands-731786 already processed\n", 923 | "spain-vs-chile-731785 already processed\n", 924 | "netherlands-vs-chile-731802 already processed\n", 925 | "australia-vs-spain-731801 already processed\n", 926 | "colombia-vs-greece-731771 already processed\n", 927 | "ivory-coast-vs-japan-731772 already processed\n", 928 | "colombia-vs-ivory-coast-731787 already processed\n", 929 | "japan-vs-greece-731788 already processed\n", 930 | "japan-vs-colombia-731803 already processed\n", 931 | "greece-vs-ivory-coast-731804 already processed\n", 932 | "uruguay-vs-costa-rica-731773 already processed\n", 933 | "england-vs-italy-731774 already processed\n", 934 | "uruguay-vs-england-731789 already processed\n", 935 | "italy-vs-costa-rica-731790 already processed\n", 936 | "italy-vs-uruguay-731805 already processed\n", 937 | "costa-rica-vs-england-731806 already processed\n", 938 | "switzerland-vs-ecuador-731775 already processed\n", 939 | "france-vs-honduras-731776 already processed\n", 940 | "switzerland-vs-france-731791 already processed\n", 941 | "honduras-vs-ecuador-731792 already processed\n", 942 | "ecuador-vs-france-731808 already processed\n", 943 | "honduras-vs-switzerland-731807 already processed\n", 944 | "argentina-vs-bosnia-herz-731777 already processed\n", 945 | "iran-vs-nigeria-731778 already processed\n", 946 | "argentina-vs-iran-731793 already processed\n", 947 | "nigeria-vs-bosnia-herz-731794 already processed\n", 948 | "nigeria-vs-argentina-731809 already processed\n", 949 | "bosnia-herz-vs-iran-731810 already processed\n", 950 | "germany-vs-portugal-731779 already processed\n", 951 | "ghana-vs-usa-731780 already processed\n", 952 | "germany-vs-ghana-731795 already processed\n", 953 | "usa-vs-portugal-731796 already processed\n", 954 | "portugal-vs-ghana-731812 already processed\n", 955 | "usa-vs-germany-731811 already processed\n", 956 | "belgium-vs-algeria-731781 already processed\n", 957 | "russia-vs-south-korea-731782 already processed\n", 958 | "belgium-vs-russia-731797 already processed\n", 959 | "south-korea-vs-algeria-731798 already processed\n", 960 | "algeria-vs-russia-731814 already processed\n", 961 | "south-korea-vs-belgium-731813 already processed\n" 962 | ] 963 | } 964 | ], 965 | "prompt_number": 24 966 | }, 967 | { 968 | "cell_type": "code", 969 | "collapsed": false, 970 | "input": [ 971 | "print len(data.keys()) #make sure you have all 64 games" 972 | ], 973 | "language": "python", 974 | "metadata": {}, 975 | "outputs": [ 976 | { 977 | "output_type": "stream", 978 | "stream": "stdout", 979 | "text": [ 980 | "64\n" 981 | ] 982 | } 983 | ], 984 | "prompt_number": 26 985 | }, 986 | { 987 | "cell_type": "code", 988 | "collapsed": false, 989 | "input": [ 990 | "import pickle\n", 991 | "\n", 992 | "pickle.dump(data, open( \"wc2014.p\", \"wb\"))" 993 | ], 994 | "language": "python", 995 | "metadata": {}, 996 | "outputs": [], 997 | "prompt_number": 29 998 | }, 999 | { 1000 | "cell_type": "code", 1001 | "collapsed": false, 1002 | "input": [ 1003 | "data == pickle.load(open(\"wc2014.p\", \"rb\")) #because I'm a bit OCD and want to make sure the data was properly stored" 1004 | ], 1005 | "language": "python", 1006 | "metadata": {}, 1007 | "outputs": [ 1008 | { 1009 | "metadata": {}, 1010 | "output_type": "pyout", 1011 | "prompt_number": 32, 1012 | "text": [ 1013 | "True" 1014 | ] 1015 | } 1016 | ], 1017 | "prompt_number": 32 1018 | }, 1019 | { 1020 | "cell_type": "markdown", 1021 | "metadata": {}, 1022 | "source": [ 1023 | "##That's all folks!\n", 1024 | "\n", 1025 | "The boring part is over. Now, it's time to play :)\n", 1026 | "\n", 1027 | "Check out my [WC final analysis notebook](http://nbviewer.ipython.org/github/rjtavares/football-crunching/blob/master/notebooks/an%20exploratory%20data%20analysis%20of%20the%20world%20cup%20final.ipynb) for an example of what you can do with the data, and follow my github repository [Football Crunching](https://github.com/rjtavares/football-crunching) for more analysis in the future." 1028 | ] 1029 | } 1030 | ], 1031 | "metadata": {} 1032 | } 1033 | ] 1034 | } -------------------------------------------------------------------------------- /notebooks/player value appreciation in Portuguese vs. English clubs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:7214aec897a9642abbdd6aecb8de29f6831a58e2f030fe64355efd15259db272" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "code", 13 | "collapsed": false, 14 | "input": [ 15 | "import requests\n", 16 | "from bs4 import BeautifulSoup\n", 17 | "import pandas as pd\n", 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from matplotlib.lines import Line2D\n", 21 | "\n", 22 | "%matplotlib inline\n", 23 | "\n", 24 | "links = {'porto': 'http://www.transfermarkt.co.uk/fc-porto/alletransfers/verein/720',\n", 25 | " 'benfica': 'http://www.transfermarkt.co.uk/benfica-lissabon/alletransfers/verein/294',\n", 26 | " 'sporting': 'http://www.transfermarkt.co.uk/sporting-lissabon/alletransfers/verein/336',\n", 27 | " 'man city': 'http://www.transfermarkt.co.uk/manchester-city/alletransfers/verein/281',\n", 28 | " 'man united': 'http://www.transfermarkt.co.uk/manchester-united/alletransfers/verein/985',\n", 29 | " 'chelsea': 'http://www.transfermarkt.co.uk/fc-chelsea/alletransfers/verein/631',\n", 30 | " }" 31 | ], 32 | "language": "python", 33 | "metadata": {}, 34 | "outputs": [], 35 | "prompt_number": 3 36 | }, 37 | { 38 | "cell_type": "code", 39 | "collapsed": false, 40 | "input": [ 41 | "transfers = []\n", 42 | "\n", 43 | "for club, link in links.items():\n", 44 | " response = requests.get(link)\n", 45 | "\n", 46 | " soup = BeautifulSoup(response.text).find_all('div', attrs={'class': 'box'})\n", 47 | "\n", 48 | " for box in soup:\n", 49 | " title_soup = box\n", 50 | " title = title_soup.find('div', attrs={'class': 'table-header'})\n", 51 | "\n", 52 | " if title:\n", 53 | " title = title.text.split('\\t')[4]\n", 54 | "\n", 55 | " for row in box.find_all('tr'):\n", 56 | " fields = title.split(' ')\n", 57 | " fields.append(club)\n", 58 | " for field in row.find_all('td'):\n", 59 | " if field[\"class\"][0] == 'rechts':\n", 60 | " fields.append(field.text)\n", 61 | " elif field[\"class\"][0] in ['redtext', 'greentext']:\n", 62 | " pass\n", 63 | " else:\n", 64 | " if field.a:\n", 65 | " try:\n", 66 | " fields.append(field.a[\"title\"])\n", 67 | " except:\n", 68 | " print \"error:\", field[\"class\"][0]\n", 69 | " else:\n", 70 | " try:\n", 71 | " fields.append(field[\"title\"])\n", 72 | " except:\n", 73 | " print \"error:\", field[\"class\"][0]\n", 74 | " if len(fields)==8:\n", 75 | " transfers.append(fields)\n", 76 | "\n", 77 | "print len(transfers)" 78 | ], 79 | "language": "python", 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "output_type": "stream", 84 | "stream": "stdout", 85 | "text": [ 86 | "6802\n" 87 | ] 88 | } 89 | ], 90 | "prompt_number": 4 91 | }, 92 | { 93 | "cell_type": "code", 94 | "collapsed": false, 95 | "input": [ 96 | "df = pd.DataFrame(transfers, columns=['type', 'season', 'club', 'position', 'player', 'transfer_club', 'transfer_club_alt', 'fee'])\n", 97 | "\n", 98 | "df.head()" 99 | ], 100 | "language": "python", 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "html": [ 105 | "
| \n", 110 | " | type | \n", 111 | "season | \n", 112 | "club | \n", 113 | "position | \n", 114 | "player | \n", 115 | "transfer_club | \n", 116 | "transfer_club_alt | \n", 117 | "fee | \n", 118 | "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", 123 | "Arrivals | \n", 124 | "14/15 | \n", 125 | "porto | \n", 126 | "Right Wing | \n", 127 | "Adri\u00e1n | \n", 128 | "Atl\u00e9tico Madrid | \n", 129 | "Atl\u00e9tico Madrid | \n", 130 | "9,68 Mill. \u00a3 | \n", 131 | "
| 1 | \n", 134 | "Arrivals | \n", 135 | "14/15 | \n", 136 | "porto | \n", 137 | "Centre Back | \n", 138 | "Bruno Martins Indi | \n", 139 | "Feyenoord Rotterdam | \n", 140 | "Feyenoord | \n", 141 | "6,78 Mill. \u00a3 | \n", 142 | "
| 2 | \n", 145 | "Arrivals | \n", 146 | "14/15 | \n", 147 | "porto | \n", 148 | "Attacking Midfield | \n", 149 | "Ot\u00e1vio | \n", 150 | "Sport Club Internacional | \n", 151 | "Internacional | \n", 152 | "6,16 Mill. \u00a3 | \n", 153 | "
| 3 | \n", 156 | "Arrivals | \n", 157 | "14/15 | \n", 158 | "porto | \n", 159 | "Attacking Midfield | \n", 160 | "Yacine Brahimi | \n", 161 | "Granada CF | \n", 162 | "Granada CF | \n", 163 | "5,72 Mill. \u00a3 | \n", 164 | "
| 4 | \n", 167 | "Arrivals | \n", 168 | "14/15 | \n", 169 | "porto | \n", 170 | "Centre Forward | \n", 171 | "Vincent Aboubakar | \n", 172 | "FC Lorient | \n", 173 | "FC Lorient | \n", 174 | "2,64 Mill. \u00a3 | \n", 175 | "
5 rows \u00d7 8 columns
\n", 179 | "| \n", 221 | " | type | \n", 222 | "season | \n", 223 | "position | \n", 224 | "player | \n", 225 | "transfer_club | \n", 226 | "transfer_club_alt | \n", 227 | "fee | \n", 228 | "
|---|---|---|---|---|---|---|---|
| club | \n", 231 | "\n", 232 | " | \n", 233 | " | \n", 234 | " | \n", 235 | " | \n", 236 | " | \n", 237 | " | \n", 238 | " |
| benfica | \n", 243 | "1149 | \n", 244 | "1149 | \n", 245 | "1149 | \n", 246 | "1149 | \n", 247 | "1149 | \n", 248 | "1149 | \n", 249 | "1149 | \n", 250 | "
| chelsea | \n", 253 | "1187 | \n", 254 | "1187 | \n", 255 | "1187 | \n", 256 | "1187 | \n", 257 | "1187 | \n", 258 | "1187 | \n", 259 | "1187 | \n", 260 | "
| man city | \n", 263 | "1279 | \n", 264 | "1279 | \n", 265 | "1279 | \n", 266 | "1279 | \n", 267 | "1279 | \n", 268 | "1279 | \n", 269 | "1279 | \n", 270 | "
| man united | \n", 273 | "1253 | \n", 274 | "1253 | \n", 275 | "1253 | \n", 276 | "1253 | \n", 277 | "1253 | \n", 278 | "1253 | \n", 279 | "1253 | \n", 280 | "
| porto | \n", 283 | "1053 | \n", 284 | "1053 | \n", 285 | "1053 | \n", 286 | "1053 | \n", 287 | "1053 | \n", 288 | "1053 | \n", 289 | "1053 | \n", 290 | "
| sporting | \n", 293 | "881 | \n", 294 | "881 | \n", 295 | "881 | \n", 296 | "881 | \n", 297 | "881 | \n", 298 | "881 | \n", 299 | "881 | \n", 300 | "
6 rows \u00d7 7 columns
\n", 304 | "| \n", 386 | " | type | \n", 387 | "season | \n", 388 | "position | \n", 389 | "player | \n", 390 | "transfer_club | \n", 391 | "transfer_club_alt | \n", 392 | "fee | \n", 393 | "fee_value | \n", 394 | "country | \n", 395 | "
|---|---|---|---|---|---|---|---|---|---|
| club | \n", 398 | "\n", 399 | " | \n", 400 | " | \n", 401 | " | \n", 402 | " | \n", 403 | " | \n", 404 | " | \n", 405 | " | \n", 406 | " | \n", 407 | " |
| benfica | \n", 412 | "1149 | \n", 413 | "1149 | \n", 414 | "1149 | \n", 415 | "1149 | \n", 416 | "1149 | \n", 417 | "1149 | \n", 418 | "1149 | \n", 419 | "439 | \n", 420 | "1149 | \n", 421 | "
| chelsea | \n", 424 | "1187 | \n", 425 | "1187 | \n", 426 | "1187 | \n", 427 | "1187 | \n", 428 | "1187 | \n", 429 | "1187 | \n", 430 | "1187 | \n", 431 | "510 | \n", 432 | "1187 | \n", 433 | "
| man city | \n", 436 | "1279 | \n", 437 | "1279 | \n", 438 | "1279 | \n", 439 | "1279 | \n", 440 | "1279 | \n", 441 | "1279 | \n", 442 | "1279 | \n", 443 | "547 | \n", 444 | "1279 | \n", 445 | "
| man united | \n", 448 | "1253 | \n", 449 | "1253 | \n", 450 | "1253 | \n", 451 | "1253 | \n", 452 | "1253 | \n", 453 | "1253 | \n", 454 | "1253 | \n", 455 | "564 | \n", 456 | "1253 | \n", 457 | "
| porto | \n", 460 | "1053 | \n", 461 | "1053 | \n", 462 | "1053 | \n", 463 | "1053 | \n", 464 | "1053 | \n", 465 | "1053 | \n", 466 | "1053 | \n", 467 | "396 | \n", 468 | "1053 | \n", 469 | "
| sporting | \n", 472 | "881 | \n", 473 | "881 | \n", 474 | "881 | \n", 475 | "881 | \n", 476 | "881 | \n", 477 | "881 | \n", 478 | "881 | \n", 479 | "358 | \n", 480 | "881 | \n", 481 | "
6 rows \u00d7 9 columns
\n", 485 | "| type | \n", 534 | "Arrivals | \n", 535 | "Departures | \n", 536 | "
|---|---|---|
| count | \n", 541 | "1048.000000 | \n", 542 | "1048.000000 | \n", 543 | "
| mean | \n", 546 | "2.128931 | \n", 547 | "2.245259 | \n", 548 | "
| std | \n", 551 | "4.964249 | \n", 552 | "6.073548 | \n", 553 | "
| min | \n", 556 | "0.000000 | \n", 557 | "0.000000 | \n", 558 | "
| 25% | \n", 561 | "0.000000 | \n", 562 | "0.000000 | \n", 563 | "
| 50% | \n", 566 | "0.000000 | \n", 567 | "0.000000 | \n", 568 | "
| 75% | \n", 571 | "1.760000 | \n", 572 | "1.580000 | \n", 573 | "
| max | \n", 576 | "40.480000 | \n", 577 | "82.720000 | \n", 578 | "
8 rows \u00d7 2 columns
\n", 582 | "| type | \n", 622 | "Arrivals | \n", 623 | "Departures | \n", 624 | "Profit % | \n", 625 | "
|---|---|---|---|
| club | \n", 628 | "\n", 629 | " | \n", 630 | " | \n", 631 | " |
| benfica | \n", 636 | "173.825667 | \n", 637 | "408.708 | \n", 638 | "135.125231 | \n", 639 | "
| chelsea | \n", 642 | "767.142000 | \n", 643 | "453.531 | \n", 644 | "-40.880437 | \n", 645 | "
| man city | \n", 648 | "539.957000 | \n", 649 | "251.616 | \n", 650 | "-53.400734 | \n", 651 | "
| man united | \n", 654 | "436.694000 | \n", 655 | "400.773 | \n", 656 | "-8.225668 | \n", 657 | "
| porto | \n", 660 | "206.609000 | \n", 661 | "641.992 | \n", 662 | "210.727993 | \n", 663 | "
| sporting | \n", 666 | "106.892000 | \n", 667 | "196.411 | \n", 668 | "83.747147 | \n", 669 | "
6 rows \u00d7 3 columns
\n", 673 | "| \n", 777 | " | player | \n", 778 | "Departures | \n", 779 | "
|---|---|---|
| club | \n", 782 | "\n", 783 | " | \n", 784 | " |
| benfica | \n", 789 | "82 | \n", 790 | "0.874476 | \n", 791 | "
| chelsea | \n", 794 | "91 | \n", 795 | "0.677357 | \n", 796 | "
| man city | \n", 799 | "110 | \n", 800 | "0.411364 | \n", 801 | "
| man united | \n", 804 | "132 | \n", 805 | "0.892356 | \n", 806 | "
| porto | \n", 809 | "50 | \n", 810 | "2.031920 | \n", 811 | "
| sporting | \n", 814 | "75 | \n", 815 | "1.635387 | \n", 816 | "
6 rows \u00d7 2 columns
\n", 820 | "| type | \n", 858 | "player | \n", 859 | "club | \n", 860 | "country | \n", 861 | "Arrivals | \n", 862 | "Departures | \n", 863 | "Profit | \n", 864 | "
|---|---|---|---|---|---|---|
| 52 | \n", 869 | "Andriy Shevchenko | \n", 870 | "chelsea | \n", 871 | "en | \n", 872 | "40.48 | \n", 873 | "0.00 | \n", 874 | "-40.48 | \n", 875 | "
| 854 | \n", 878 | "Rio Ferdinand | \n", 879 | "man united | \n", 880 | "en | \n", 881 | "40.48 | \n", 882 | "0.00 | \n", 883 | "-40.48 | \n", 884 | "
| 661 | \n", 887 | "Michael Essien | \n", 888 | "chelsea | \n", 889 | "en | \n", 890 | "33.44 | \n", 891 | "0.00 | \n", 892 | "-33.44 | \n", 893 | "
| 254 | \n", 896 | "Dimitar Berbatov | \n", 897 | "man united | \n", 898 | "en | \n", 899 | "33.44 | \n", 900 | "4.40 | \n", 901 | "-29.04 | \n", 902 | "
| 471 | \n", 905 | "Joleon Lescott | \n", 906 | "man city | \n", 907 | "en | \n", 908 | "24.20 | \n", 909 | "0.00 | \n", 910 | "-24.20 | \n", 911 | "
| 394 | \n", 914 | "Hern\u00e1n Crespo | \n", 915 | "chelsea | \n", 916 | "en | \n", 917 | "22.88 | \n", 918 | "0.00 | \n", 919 | "-22.88 | \n", 920 | "
| 866 | \n", 923 | "Robinho | \n", 924 | "man city | \n", 925 | "en | \n", 926 | "37.84 | \n", 927 | "15.84 | \n", 928 | "-22.00 | \n", 929 | "
| 745 | \n", 932 | "Owen Hargreaves | \n", 933 | "man united | \n", 934 | "en | \n", 935 | "22.00 | \n", 936 | "0.00 | \n", 937 | "-22.00 | \n", 938 | "
| 517 | \n", 941 | "J\u00f4 | \n", 942 | "man city | \n", 943 | "en | \n", 944 | "21.12 | \n", 945 | "0.00 | \n", 946 | "-21.12 | \n", 947 | "
| 290 | \n", 950 | "Emmanuel Adebayor | \n", 951 | "man city | \n", 952 | "en | \n", 953 | "25.52 | \n", 954 | "5.63 | \n", 955 | "-19.89 | \n", 956 | "
10 rows \u00d7 6 columns
\n", 960 | "| type | \n", 1000 | "player | \n", 1001 | "club | \n", 1002 | "country | \n", 1003 | "Arrivals | \n", 1004 | "Departures | \n", 1005 | "Profit | \n", 1006 | "
|---|---|---|---|---|---|---|
| 172 | \n", 1011 | "Cristiano Ronaldo | \n", 1012 | "man united | \n", 1013 | "en | \n", 1014 | "15.400 | \n", 1015 | "82.72 | \n", 1016 | "67.320 | \n", 1017 | "
| 284 | \n", 1020 | "Eliaquim Mangala | \n", 1021 | "porto | \n", 1022 | "pt | \n", 1023 | "5.720 | \n", 1024 | "47.36 | \n", 1025 | "41.640 | \n", 1026 | "
| 309 | \n", 1029 | "Falcao | \n", 1030 | "porto | \n", 1031 | "pt | \n", 1032 | "4.780 | \n", 1033 | "41.36 | \n", 1034 | "36.580 | \n", 1035 | "
| 427 | \n", 1038 | "James Rodr\u00edguez | \n", 1039 | "porto | \n", 1040 | "pt | \n", 1041 | "6.470 | \n", 1042 | "39.60 | \n", 1043 | "33.130 | \n", 1044 | "
| 213 | \n", 1047 | "David Beckham | \n", 1048 | "man united | \n", 1049 | "en | \n", 1050 | "0.000 | \n", 1051 | "33.00 | \n", 1052 | "33.000 | \n", 1053 | "
| 401 | \n", 1056 | "Hulk | \n", 1057 | "porto | \n", 1058 | "pt | \n", 1059 | "16.720 | \n", 1060 | "48.40 | \n", 1061 | "31.680 | \n", 1062 | "
| 81 | \n", 1065 | "Axel Witsel | \n", 1066 | "benfica | \n", 1067 | "pt | \n", 1068 | "7.920 | \n", 1069 | "35.20 | \n", 1070 | "27.280 | \n", 1071 | "
| 844 | \n", 1074 | "Ricardo Carvalho | \n", 1075 | "porto | \n", 1076 | "pt | \n", 1077 | "0.000 | \n", 1078 | "26.40 | \n", 1079 | "26.400 | \n", 1080 | "
| 336 | \n", 1083 | "F\u00e1bio Coentr\u00e3o | \n", 1084 | "benfica | \n", 1085 | "pt | \n", 1086 | "0.792 | \n", 1087 | "26.40 | \n", 1088 | "25.608 | \n", 1089 | "
| 799 | \n", 1092 | "Pepe | \n", 1093 | "porto | \n", 1094 | "pt | \n", 1095 | "1.760 | \n", 1096 | "26.40 | \n", 1097 | "24.640 | \n", 1098 | "
10 rows \u00d7 6 columns
\n", 1102 | "