├── .gitignore
├── EPV_grid.csv
├── LICENSE
├── README.md
├── data
    └── events.csv
├── linecollection.py
├── logo.jpg
├── main.py
├── requirements.txt
├── test.py
├── tutorial.ipynb
├── utils.py
└── visuals.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .ipynb_checkpoints
3 | */.ipynb_checkpoints/*
4 | 


--------------------------------------------------------------------------------
/EPV_grid.csv:
--------------------------------------------------------------------------------
 1 | 0.0046,0.0046,0.0045,0.0046,0.0048,0.0051,0.0054,0.0057,0.0059,0.0062,0.0066,0.0068,0.0071,0.0072,0.0073,0.0077,0.0082,0.0086,0.0088,0.0092,0.0096,0.0100,0.0104,0.0110,0.0117,0.0123,0.0127,0.0131,0.0135,0.0142,0.0150,0.0160,0.0170,0.0181,0.0192,0.0208,0.0227,0.0244,0.0259,0.0276,0.0297,0.0313,0.0326,0.0341,0.0356,0.0365,0.0368,0.0388,0.0424,0.0443
 2 | 0.0044,0.0044,0.0045,0.0046,0.0049,0.0052,0.0055,0.0058,0.0060,0.0062,0.0066,0.0069,0.0072,0.0074,0.0075,0.0079,0.0084,0.0087,0.0090,0.0093,0.0097,0.0102,0.0106,0.0112,0.0118,0.0123,0.0128,0.0132,0.0136,0.0143,0.0152,0.0162,0.0171,0.0183,0.0196,0.0211,0.0229,0.0246,0.0261,0.0278,0.0298,0.0314,0.0327,0.0342,0.0359,0.0368,0.0370,0.0388,0.0421,0.0437
 3 | 0.0041,0.0042,0.0044,0.0047,0.0052,0.0056,0.0058,0.0060,0.0061,0.0063,0.0067,0.0070,0.0074,0.0076,0.0079,0.0083,0.0087,0.0090,0.0092,0.0095,0.0101,0.0106,0.0111,0.0115,0.0119,0.0124,0.0130,0.0135,0.0139,0.0146,0.0156,0.0165,0.0174,0.0186,0.0202,0.0218,0.0232,0.0248,0.0265,0.0283,0.0300,0.0315,0.0328,0.0345,0.0366,0.0376,0.0376,0.0388,0.0413,0.0425
 4 | 0.0041,0.0042,0.0045,0.0049,0.0054,0.0058,0.0061,0.0062,0.0062,0.0064,0.0068,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0092,0.0094,0.0098,0.0104,0.0109,0.0114,0.0118,0.0122,0.0126,0.0133,0.0138,0.0142,0.0150,0.0160,0.0170,0.0179,0.0192,0.0209,0.0224,0.0237,0.0252,0.0268,0.0284,0.0300,0.0316,0.0332,0.0351,0.0373,0.0386,0.0392,0.0400,0.0411,0.0417
 5 | 0.0044,0.0045,0.0048,0.0052,0.0056,0.0060,0.0062,0.0064,0.0065,0.0067,0.0070,0.0073,0.0077,0.0080,0.0083,0.0087,0.0092,0.0095,0.0097,0.0101,0.0107,0.0113,0.0117,0.0121,0.0125,0.0129,0.0136,0.0141,0.0146,0.0153,0.0164,0.0175,0.0186,0.0200,0.0215,0.0230,0.0243,0.0256,0.0269,0.0282,0.0296,0.0315,0.0339,0.0361,0.0380,0.0400,0.0418,0.0424,0.0416,0.0412
 6 | 0.0045,0.0047,0.0051,0.0054,0.0057,0.0060,0.0063,0.0066,0.0067,0.0069,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0094,0.0098,0.0100,0.0104,0.0111,0.0116,0.0120,0.0124,0.0127,0.0131,0.0138,0.0144,0.0149,0.0157,0.0169,0.0180,0.0191,0.0204,0.0219,0.0234,0.0248,0.0261,0.0274,0.0287,0.0300,0.0323,0.0357,0.0382,0.0399,0.0421,0.0447,0.0452,0.0438,0.0430
 7 | 0.0047,0.0049,0.0053,0.0056,0.0057,0.0060,0.0064,0.0067,0.0068,0.0070,0.0075,0.0078,0.0080,0.0083,0.0087,0.0091,0.0096,0.0100,0.0103,0.0108,0.0115,0.0119,0.0123,0.0125,0.0127,0.0132,0.0140,0.0147,0.0153,0.0162,0.0174,0.0184,0.0192,0.0204,0.0221,0.0237,0.0251,0.0267,0.0285,0.0299,0.0311,0.0340,0.0387,0.0416,0.0429,0.0449,0.0476,0.0486,0.0477,0.0473
 8 | 0.0047,0.0050,0.0054,0.0057,0.0059,0.0062,0.0067,0.0070,0.0070,0.0072,0.0077,0.0080,0.0082,0.0085,0.0090,0.0094,0.0098,0.0102,0.0106,0.0111,0.0117,0.0122,0.0125,0.0128,0.0131,0.0136,0.0143,0.0151,0.0157,0.0166,0.0177,0.0187,0.0195,0.0207,0.0223,0.0238,0.0253,0.0273,0.0299,0.0321,0.0341,0.0373,0.0417,0.0446,0.0460,0.0489,0.0534,0.0546,0.0525,0.0514
 9 | 0.0048,0.0050,0.0055,0.0059,0.0062,0.0066,0.0071,0.0074,0.0074,0.0076,0.0079,0.0082,0.0085,0.0088,0.0092,0.0095,0.0100,0.0104,0.0107,0.0112,0.0119,0.0124,0.0128,0.0132,0.0136,0.0142,0.0148,0.0155,0.0163,0.0171,0.0179,0.0188,0.0199,0.0211,0.0225,0.0238,0.0252,0.0278,0.0316,0.0353,0.0390,0.0421,0.0447,0.0471,0.0492,0.0541,0.0619,0.0633,0.0582,0.0556
10 | 0.0050,0.0052,0.0058,0.0062,0.0065,0.0069,0.0074,0.0077,0.0077,0.0078,0.0081,0.0084,0.0088,0.0091,0.0094,0.0097,0.0102,0.0106,0.0109,0.0114,0.0120,0.0125,0.0131,0.0136,0.0141,0.0146,0.0152,0.0158,0.0166,0.0173,0.0180,0.0189,0.0201,0.0214,0.0227,0.0243,0.0263,0.0293,0.0333,0.0373,0.0413,0.0452,0.0490,0.0540,0.0600,0.0655,0.0703,0.0702,0.0651,0.0626
11 | 0.0053,0.0057,0.0063,0.0067,0.0068,0.0071,0.0076,0.0079,0.0079,0.0081,0.0083,0.0086,0.0089,0.0093,0.0096,0.0100,0.0103,0.0107,0.0113,0.0117,0.0121,0.0126,0.0132,0.0138,0.0145,0.0150,0.0155,0.0161,0.0168,0.0174,0.0180,0.0188,0.0200,0.0213,0.0229,0.0253,0.0286,0.0318,0.0349,0.0380,0.0410,0.0465,0.0546,0.0653,0.0786,0.0829,0.0784,0.0752,0.0734,0.0725
12 | 0.0060,0.0062,0.0067,0.0070,0.0071,0.0074,0.0078,0.0080,0.0081,0.0082,0.0084,0.0087,0.0091,0.0095,0.0098,0.0101,0.0105,0.0109,0.0115,0.0119,0.0123,0.0128,0.0134,0.0140,0.0146,0.0152,0.0156,0.0163,0.0171,0.0178,0.0183,0.0191,0.0201,0.0214,0.0230,0.0259,0.0300,0.0335,0.0362,0.0395,0.0432,0.0514,0.0639,0.0779,0.0934,0.0998,0.0974,0.0967,0.0980,0.0986
13 | 0.0069,0.0069,0.0069,0.0071,0.0074,0.0077,0.0080,0.0082,0.0081,0.0082,0.0085,0.0089,0.0093,0.0096,0.0099,0.0102,0.0106,0.0111,0.0116,0.0121,0.0125,0.0130,0.0136,0.0141,0.0145,0.0150,0.0156,0.0165,0.0177,0.0185,0.0189,0.0196,0.0205,0.0216,0.0230,0.0260,0.0306,0.0344,0.0373,0.0419,0.0481,0.0597,0.0769,0.0918,0.1045,0.1162,0.1271,0.1347,0.1390,0.1412
14 | 0.0077,0.0075,0.0072,0.0072,0.0076,0.0079,0.0082,0.0083,0.0081,0.0082,0.0086,0.0089,0.0093,0.0096,0.0099,0.0103,0.0107,0.0112,0.0116,0.0121,0.0126,0.0131,0.0137,0.0140,0.0142,0.0147,0.0156,0.0165,0.0175,0.0183,0.0190,0.0198,0.0208,0.0219,0.0232,0.0262,0.0310,0.0352,0.0386,0.0454,0.0555,0.0698,0.0882,0.1038,0.1165,0.1362,0.1628,0.1982,0.2425,0.2647
15 | 0.0082,0.0080,0.0075,0.0075,0.0078,0.0081,0.0084,0.0084,0.0082,0.0083,0.0085,0.0088,0.0092,0.0096,0.0099,0.0103,0.0108,0.0112,0.0115,0.0119,0.0125,0.0130,0.0136,0.0138,0.0136,0.0142,0.0156,0.0164,0.0166,0.0173,0.0184,0.0196,0.0210,0.0223,0.0235,0.0265,0.0313,0.0358,0.0400,0.0500,0.0656,0.0816,0.0979,0.1139,0.1296,0.1598,0.2044,0.2873,0.4085,0.4691
16 | 0.0085,0.0083,0.0077,0.0076,0.0079,0.0082,0.0085,0.0085,0.0083,0.0083,0.0085,0.0088,0.0092,0.0095,0.0099,0.0103,0.0108,0.0112,0.0114,0.0118,0.0124,0.0130,0.0136,0.0137,0.0133,0.0139,0.0156,0.0164,0.0162,0.0168,0.0181,0.0195,0.0211,0.0225,0.0237,0.0267,0.0315,0.0362,0.0408,0.0523,0.0707,0.0875,0.1027,0.1190,0.1362,0.1716,0.2252,0.3319,0.4915,0.5714
17 | 0.0085,0.0083,0.0077,0.0076,0.0079,0.0082,0.0085,0.0085,0.0083,0.0083,0.0085,0.0088,0.0092,0.0095,0.0099,0.0103,0.0108,0.0112,0.0114,0.0118,0.0124,0.0130,0.0136,0.0137,0.0133,0.0139,0.0156,0.0164,0.0162,0.0168,0.0181,0.0195,0.0211,0.0225,0.0237,0.0267,0.0315,0.0362,0.0408,0.0523,0.0707,0.0875,0.1027,0.1190,0.1362,0.1716,0.2252,0.3319,0.4915,0.5714
18 | 0.0082,0.0080,0.0075,0.0075,0.0078,0.0081,0.0084,0.0084,0.0082,0.0083,0.0085,0.0088,0.0092,0.0096,0.0099,0.0103,0.0108,0.0112,0.0115,0.0119,0.0125,0.0130,0.0136,0.0138,0.0136,0.0142,0.0156,0.0164,0.0166,0.0173,0.0184,0.0196,0.0210,0.0223,0.0235,0.0265,0.0313,0.0358,0.0400,0.0500,0.0656,0.0816,0.0979,0.1139,0.1296,0.1598,0.2044,0.2873,0.4085,0.4691
19 | 0.0077,0.0075,0.0072,0.0072,0.0076,0.0079,0.0082,0.0083,0.0081,0.0082,0.0086,0.0089,0.0093,0.0096,0.0099,0.0103,0.0107,0.0112,0.0116,0.0121,0.0126,0.0131,0.0137,0.0140,0.0142,0.0147,0.0156,0.0165,0.0175,0.0183,0.0190,0.0198,0.0208,0.0219,0.0232,0.0262,0.0310,0.0352,0.0386,0.0454,0.0555,0.0698,0.0882,0.1038,0.1165,0.1362,0.1628,0.1982,0.2425,0.2647
20 | 0.0069,0.0069,0.0069,0.0071,0.0074,0.0077,0.0080,0.0082,0.0081,0.0082,0.0085,0.0089,0.0093,0.0096,0.0099,0.0102,0.0106,0.0111,0.0116,0.0121,0.0125,0.0130,0.0136,0.0141,0.0145,0.0150,0.0156,0.0165,0.0177,0.0185,0.0189,0.0196,0.0205,0.0216,0.0230,0.0260,0.0306,0.0344,0.0373,0.0419,0.0481,0.0597,0.0769,0.0918,0.1045,0.1162,0.1271,0.1347,0.1390,0.1412
21 | 0.0060,0.0062,0.0067,0.0070,0.0071,0.0074,0.0078,0.0080,0.0081,0.0082,0.0084,0.0087,0.0091,0.0095,0.0098,0.0101,0.0105,0.0109,0.0115,0.0119,0.0123,0.0128,0.0134,0.0140,0.0146,0.0152,0.0156,0.0163,0.0171,0.0178,0.0183,0.0191,0.0201,0.0214,0.0230,0.0259,0.0300,0.0335,0.0362,0.0395,0.0432,0.0514,0.0639,0.0779,0.0934,0.0998,0.0974,0.0967,0.0980,0.0986
22 | 0.0053,0.0057,0.0063,0.0067,0.0068,0.0071,0.0076,0.0079,0.0079,0.0081,0.0083,0.0086,0.0089,0.0093,0.0096,0.0100,0.0103,0.0107,0.0113,0.0117,0.0121,0.0126,0.0132,0.0138,0.0145,0.0150,0.0155,0.0161,0.0168,0.0174,0.0180,0.0188,0.0200,0.0213,0.0229,0.0253,0.0286,0.0318,0.0349,0.0380,0.0410,0.0465,0.0546,0.0653,0.0786,0.0829,0.0784,0.0752,0.0734,0.0725
23 | 0.0050,0.0052,0.0058,0.0062,0.0065,0.0069,0.0074,0.0077,0.0077,0.0078,0.0081,0.0084,0.0088,0.0091,0.0094,0.0097,0.0102,0.0106,0.0109,0.0114,0.0120,0.0125,0.0131,0.0136,0.0141,0.0146,0.0152,0.0158,0.0166,0.0173,0.0180,0.0189,0.0201,0.0214,0.0227,0.0243,0.0263,0.0293,0.0333,0.0373,0.0413,0.0452,0.0490,0.0540,0.0600,0.0655,0.0703,0.0702,0.0651,0.0626
24 | 0.0048,0.0050,0.0055,0.0059,0.0062,0.0066,0.0071,0.0074,0.0074,0.0076,0.0079,0.0082,0.0085,0.0088,0.0092,0.0095,0.0100,0.0104,0.0107,0.0112,0.0119,0.0124,0.0128,0.0132,0.0136,0.0142,0.0148,0.0155,0.0163,0.0171,0.0179,0.0188,0.0199,0.0211,0.0225,0.0238,0.0252,0.0278,0.0316,0.0353,0.0390,0.0421,0.0447,0.0471,0.0492,0.0541,0.0619,0.0633,0.0582,0.0556
25 | 0.0047,0.0050,0.0054,0.0057,0.0059,0.0062,0.0067,0.0070,0.0070,0.0072,0.0077,0.0080,0.0082,0.0085,0.0090,0.0094,0.0098,0.0102,0.0106,0.0111,0.0117,0.0122,0.0125,0.0128,0.0131,0.0136,0.0143,0.0151,0.0157,0.0166,0.0177,0.0187,0.0195,0.0207,0.0223,0.0238,0.0253,0.0273,0.0299,0.0321,0.0341,0.0373,0.0417,0.0446,0.0460,0.0489,0.0534,0.0546,0.0525,0.0514
26 | 0.0047,0.0049,0.0053,0.0056,0.0057,0.0060,0.0064,0.0067,0.0068,0.0070,0.0075,0.0078,0.0080,0.0083,0.0087,0.0091,0.0096,0.0100,0.0103,0.0108,0.0115,0.0119,0.0123,0.0125,0.0127,0.0132,0.0140,0.0147,0.0153,0.0162,0.0174,0.0184,0.0192,0.0204,0.0221,0.0237,0.0251,0.0267,0.0285,0.0299,0.0311,0.0340,0.0387,0.0416,0.0429,0.0449,0.0476,0.0486,0.0477,0.0473
27 | 0.0045,0.0047,0.0051,0.0054,0.0057,0.0060,0.0063,0.0066,0.0067,0.0069,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0094,0.0098,0.0100,0.0104,0.0111,0.0116,0.0120,0.0124,0.0127,0.0131,0.0138,0.0144,0.0149,0.0157,0.0169,0.0180,0.0191,0.0204,0.0219,0.0234,0.0248,0.0261,0.0274,0.0287,0.0300,0.0323,0.0357,0.0382,0.0399,0.0421,0.0447,0.0452,0.0438,0.0430
28 | 0.0044,0.0045,0.0048,0.0052,0.0056,0.0060,0.0062,0.0064,0.0065,0.0067,0.0070,0.0073,0.0077,0.0080,0.0083,0.0087,0.0092,0.0095,0.0097,0.0101,0.0107,0.0113,0.0117,0.0121,0.0125,0.0129,0.0136,0.0141,0.0146,0.0153,0.0164,0.0175,0.0186,0.0200,0.0215,0.0230,0.0243,0.0256,0.0269,0.0282,0.0296,0.0315,0.0339,0.0361,0.0380,0.0400,0.0418,0.0424,0.0416,0.0412
29 | 0.0041,0.0042,0.0045,0.0049,0.0054,0.0058,0.0061,0.0062,0.0062,0.0064,0.0068,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0092,0.0094,0.0098,0.0104,0.0109,0.0114,0.0118,0.0122,0.0126,0.0133,0.0138,0.0142,0.0150,0.0160,0.0170,0.0179,0.0192,0.0209,0.0224,0.0237,0.0252,0.0268,0.0284,0.0300,0.0316,0.0332,0.0351,0.0373,0.0386,0.0392,0.0400,0.0411,0.0417
30 | 0.0041,0.0042,0.0044,0.0047,0.0052,0.0056,0.0058,0.0060,0.0061,0.0063,0.0067,0.0070,0.0074,0.0076,0.0079,0.0083,0.0087,0.0090,0.0092,0.0095,0.0101,0.0106,0.0111,0.0115,0.0119,0.0124,0.0130,0.0135,0.0139,0.0146,0.0156,0.0165,0.0174,0.0186,0.0202,0.0218,0.0232,0.0248,0.0265,0.0283,0.0300,0.0315,0.0328,0.0345,0.0366,0.0376,0.0376,0.0388,0.0413,0.0425
31 | 0.0044,0.0044,0.0045,0.0046,0.0049,0.0052,0.0055,0.0058,0.0060,0.0062,0.0066,0.0069,0.0072,0.0074,0.0075,0.0079,0.0084,0.0087,0.0090,0.0093,0.0097,0.0102,0.0106,0.0112,0.0118,0.0123,0.0128,0.0132,0.0136,0.0143,0.0152,0.0162,0.0171,0.0183,0.0196,0.0211,0.0229,0.0246,0.0261,0.0278,0.0298,0.0314,0.0327,0.0342,0.0359,0.0368,0.0370,0.0388,0.0421,0.0437
32 | 0.0046,0.0046,0.0045,0.0046,0.0048,0.0051,0.0054,0.0057,0.0059,0.0062,0.0066,0.0068,0.0071,0.0072,0.0073,0.0077,0.0082,0.0086,0.0088,0.0092,0.0096,0.0100,0.0104,0.0110,0.0117,0.0123,0.0127,0.0131,0.0135,0.0142,0.0150,0.0160,0.0170,0.0181,0.0192,0.0208,0.0227,0.0244,0.0259,0.0276,0.0297,0.0313,0.0326,0.0341,0.0356,0.0365,0.0368,0.0388,0.0424,0.0443
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Ali Hasan Khan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Scraping Whoscored Event Data
 2 | ![alt text](https://github.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data/blob/main/logo.jpg "Whoscored")
 3 | 
 4 | Tool to scrape match event data from [Whoscored](http://whoscored.com/ "Whoscored")'s chalkboard using **Selenium**. 
 5 | 
 6 | Installation:
 7 | 1) `git clone https://github.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data.git`
 8 | 
 9 | 2) `pip install -r requirements.txt`
10 | 
11 | 3) For some additional visual customisations replace **linecollection.py** with the one present in mplsoccer folder on your machine (somewhere here: ~\anaconda3\Lib\site-packages\mplsoccer). [Recommended for inverted gradient effect in pass maps] 
12 |   
13 | 4) Follow **tutorial.ipynb** for guide.
14 | 
15 | 
16 | 
17 | Reach me [here](https://twitter.com/rockingAli5) for any kind of help :) 
18 | 
19 | Special thanks to [Laurie Shaw](https://twitter.com/EightyFivePoint) for Expected Possession Value model ([check out his work here](http://eightyfivepoints.blogspot.com/)).
20 | 
21 | For any help/suggestion regarding mplsoccer reach out to the creators: [Andy](https://twitter.com/numberstorm), [Anmol](https://twitter.com/slothfulwave612).
22 | 


--------------------------------------------------------------------------------
/linecollection.py:
--------------------------------------------------------------------------------
  1 | """ A module with functions for using LineCollection to create lines.´´."""
  2 | 
  3 | import warnings
  4 | 
  5 | import numpy as np
  6 | from matplotlib import rcParams
  7 | from matplotlib.cm import get_cmap
  8 | from matplotlib.collections import LineCollection
  9 | from matplotlib.colors import to_rgba_array
 10 | from matplotlib.legend import Legend
 11 | from matplotlib.legend_handler import HandlerLineCollection
 12 | 
 13 | from mplsoccer.cm import create_transparent_cmap
 14 | from mplsoccer.utils import validate_ax
 15 | 
 16 | __all__ = ['lines']
 17 | 
 18 | 
 19 | def lines(xstart, ystart, xend, yend, color=None, n_segments=100,
 20 |           comet=False, opp_comet=False, transparent=False, opp_transparent=False, alpha_start=0.01,
 21 |           alpha_end=1, cmap=None, ax=None, vertical=False, reverse_cmap=False, **kwargs):
 22 |     """ Plots lines using matplotlib.collections.LineCollection.
 23 |     This is a fast way to plot multiple lines without loops.
 24 |     Also enables lines that increase in width or opacity by splitting
 25 |     the line into n_segments of increasing
 26 |     width or opacity as the line progresses.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     xstart, ystart, xend, yend: array-like or scalar.
 31 |         Commonly, these parameters are 1D arrays.
 32 |         These should be the start and end coordinates of the lines.
 33 |     color : A matplotlib color or sequence of colors, defaults to None.
 34 |         Defaults to None. In that case the marker color is determined
 35 |         by the value rcParams['lines.color']
 36 |     n_segments : int, default 100
 37 |         If comet=True or transparent=True this is used to split the line
 38 |         into n_segments of increasing width/opacity.
 39 |     comet : bool default False
 40 |         Whether to plot the lines increasing in width.
 41 |     opp_comet : bool default False
 42 |         Whether to plot the lines decreasing in width.
 43 |     transparent : bool, default False
 44 |         Whether to plot the lines increasing in opacity.
 45 |     opp_transparent : bool, default False
 46 |         Whether to plot the lines decreasing in opacity.
 47 |     linewidth or lw : array-like or scalar, default 5.
 48 |         Multiple linewidths not supported for the comet or transparent lines.
 49 |     alpha_start: float, default 0.01
 50 |         The starting alpha value for transparent lines, between 0 (transparent) and 1 (opaque).
 51 |         If transparent = True the line will be drawn to
 52 |         linearly increase in opacity between alpha_start and alpha_end.
 53 |     alpha_end : float, default 1
 54 |         The ending alpha value for transparent lines, between 0 (transparent) and 1 (opaque).
 55 |         If transparent = True the line will be drawn to
 56 |         linearly increase in opacity between alpha_start and alpha_end.
 57 |     cmap : str, default None
 58 |         A matplotlib cmap (colormap) name
 59 |     vertical : bool, default False
 60 |         If the orientation is vertical (True), then the code switches the x and y coordinates.
 61 |     reverse_cmap : bool, default False
 62 |         Whether to reverse the cmap colors.
 63 |         If the pitch is horizontal and the y-axis is inverted then set this to True.
 64 |     ax : matplotlib.axes.Axes, default None
 65 |         The axis to plot on.
 66 |     **kwargs : All other keyword arguments are passed on to matplotlib.collections.LineCollection.
 67 | 
 68 |     Returns
 69 |     -------
 70 |     LineCollection : matplotlib.collections.LineCollection
 71 | 
 72 |     Examples
 73 |     --------
 74 |     >>> from mplsoccer import Pitch
 75 |     >>> pitch = Pitch()
 76 |     >>> fig, ax = pitch.draw()
 77 |     >>> pitch.lines(20, 20, 45, 80, comet=True, transparent=True, ax=ax)
 78 | 
 79 |     >>> from mplsoccer.linecollection import lines
 80 |     >>> import matplotlib.pyplot as plt
 81 |     >>> fig, ax = plt.subplots()
 82 |     >>> lines([0.1, 0.4], [0.1, 0.5], [0.9, 0.4], [0.8, 0.8], ax=ax)
 83 |     """
 84 |     validate_ax(ax)
 85 |     if not isinstance(comet, bool):
 86 |         raise TypeError("Invalid argument: comet should be bool (True or False).")
 87 |     if not isinstance(transparent, bool):
 88 |         raise TypeError("Invalid argument: transparent should be bool (True or False).")
 89 | 
 90 |     if alpha_start < 0 or alpha_start > 1:
 91 |         raise TypeError("alpha_start values should be within 0-1 range")
 92 |     if alpha_end < 0 or alpha_end > 1:
 93 |         raise TypeError("alpha_end values should be within 0-1 range")
 94 |     if alpha_start > alpha_end:
 95 |         msg = "Alpha start > alpha end. The line will increase in transparency nearer to the end"
 96 |         warnings.warn(msg)
 97 | 
 98 |     if 'colors' in kwargs.keys():
 99 |         warnings.warn("lines method takes 'color' as an argument, 'colors' in ignored")
100 | 
101 |     if color is not None and cmap is not None:
102 |         raise ValueError("Only use one of color or cmap arguments not both.")
103 | 
104 |     if 'lw' in kwargs.keys() and 'linewidth' in kwargs.keys():
105 |         raise TypeError("lines got multiple values for 'linewidth' argument (linewidth and lw).")
106 | 
107 |     # set linewidth
108 |     if 'lw' in kwargs.keys():
109 |         lw = kwargs.pop('lw', 5)
110 |     elif 'linewidth' in kwargs.keys():
111 |         lw = kwargs.pop('linewidth', 5)
112 |     else:
113 |         lw = 5
114 | 
115 |     # to arrays
116 |     xstart = np.ravel(xstart)
117 |     ystart = np.ravel(ystart)
118 |     xend = np.ravel(xend)
119 |     yend = np.ravel(yend)
120 |     lw = np.ravel(lw)
121 | 
122 |     if (comet or transparent) and (lw.size > 1):
123 |         msg = "Multiple linewidths with a comet or transparent line is not implemented."
124 |         raise NotImplementedError(msg)
125 | 
126 |     # set color
127 |     if color is None and cmap is None:
128 |         color = rcParams['lines.color']
129 | 
130 |     if (comet or transparent) and (cmap is None) and (to_rgba_array(color).shape[0] > 1):
131 |         msg = "Multiple colors with a comet or transparent line is not implemented."
132 |         raise NotImplementedError(msg)
133 | 
134 |     if xstart.size != ystart.size:
135 |         raise ValueError("xstart and ystart must be the same size")
136 |     if xstart.size != xend.size:
137 |         raise ValueError("xstart and xend must be the same size")
138 |     if ystart.size != yend.size:
139 |         raise ValueError("ystart and yend must be the same size")
140 | 
141 |     if (lw.size > 1) and (lw.size != xstart.size):
142 |         raise ValueError("lw and xstart must be the same size")
143 | 
144 |     if lw.size == 1:
145 |         lw = lw[0]
146 | 
147 |     if vertical:
148 |         ystart, xstart = xstart, ystart
149 |         yend, xend = xend, yend
150 | 
151 |     # create linewidth
152 |     if comet:
153 |         lw = np.linspace(1, lw, n_segments)
154 |         handler_first_lw = False
155 |     else:
156 |         handler_first_lw = True
157 | 
158 |     if opp_comet:
159 |         lw = np.linspace(lw, 1, n_segments)
160 | 
161 |     if (transparent is False) and (comet is False) and (cmap is None):
162 |         multi_segment = False
163 |     else:
164 |         multi_segment = True
165 | 
166 |     if transparent:
167 |         cmap = create_transparent_cmap(color, cmap, n_segments, alpha_start, alpha_end)
168 | 
169 |     if opp_transparent:
170 |         cmap = create_transparent_cmap(color, cmap, n_segments, alpha_start, alpha_end)
171 |         cmap = cmap.reversed()
172 | 
173 |     if isinstance(cmap, str):
174 |         cmap = get_cmap(cmap)
175 | 
176 |     if cmap is not None:
177 |         handler_cmap = True
178 |         line_collection = _lines_cmap(xstart, ystart, xend, yend, lw=lw, cmap=cmap,
179 |                                       ax=ax, n_segments=n_segments, multi_segment=multi_segment,
180 |                                       reverse_cmap=reverse_cmap, **kwargs)
181 |     else:
182 |         handler_cmap = False
183 |         line_collection = _lines_no_cmap(xstart, ystart, xend, yend,
184 |                                          lw=lw, color=color, ax=ax, n_segments=n_segments,
185 |                                          multi_segment=multi_segment, **kwargs)
186 | 
187 |     line_collection_handler = HandlerLines(numpoints=n_segments, invert_y=reverse_cmap,
188 |                                            first_lw=handler_first_lw, use_cmap=handler_cmap)
189 |     Legend.update_default_handler_map({line_collection: line_collection_handler})
190 | 
191 |     return line_collection
192 | 
193 | 
194 | def _create_segments(xstart, ystart, xend, yend, n_segments=100, multi_segment=False):
195 |     if multi_segment:
196 |         x = np.linspace(xstart, xend, n_segments + 1)
197 |         y = np.linspace(ystart, yend, n_segments + 1)
198 |         points = np.array([x, y]).T
199 |         points = np.concatenate([points, np.expand_dims(points[:, -1, :], 1)], axis=1)
200 |         points = np.expand_dims(points, 1)
201 |         segments = np.concatenate([points[:, :, :-2, :],
202 |                                    points[:, :, 1:-1, :],
203 |                                    points[:, :, 2:, :]], axis=1)
204 |         segments = np.transpose(segments, (0, 2, 1, 3)).reshape((-1, 3, 2))
205 |     else:
206 |         segments = np.transpose(np.array([[xstart, ystart], [xend, yend]]), (2, 0, 1))
207 |     return segments
208 | 
209 | 
210 | def _lines_no_cmap(xstart, ystart, xend, yend, lw=None, color=None, ax=None,
211 |                    n_segments=100, multi_segment=False, **kwargs):
212 |     segments = _create_segments(xstart, ystart, xend, yend,
213 |                                 n_segments=n_segments, multi_segment=multi_segment)
214 |     color = to_rgba_array(color)
215 |     if (color.shape[0] > 1) and (color.shape[0] != xstart.size):
216 |         raise ValueError("xstart and color must be the same size")
217 |     line_collection = LineCollection(segments, color=color, linewidth=lw, snap=False, **kwargs)
218 |     line_collection = ax.add_collection(line_collection)
219 |     return line_collection
220 | 
221 | 
222 | def _lines_cmap(xstart, ystart, xend, yend, lw=None, cmap=None, ax=None,
223 |                 n_segments=100, multi_segment=False, reverse_cmap=False, **kwargs):
224 |     segments = _create_segments(xstart, ystart, xend, yend,
225 |                                 n_segments=n_segments, multi_segment=multi_segment)
226 |     if reverse_cmap:
227 |         cmap = cmap.reversed()
228 |     line_collection = LineCollection(segments, cmap=cmap, linewidth=lw, snap=False, **kwargs)
229 |     line_collection = ax.add_collection(line_collection)
230 |     extent = ax.get_ylim()
231 |     pitch_array = np.linspace(extent[0], extent[1], n_segments)
232 |     line_collection.set_array(pitch_array)
233 |     return line_collection
234 | 
235 | 
236 | # Amended from
237 | # https://stackoverflow.com/questions/49223702/adding-a-legend-to-a-matplotlib-plot-with-a-multicolored-line?rq=1
238 | class HandlerLines(HandlerLineCollection):
239 |     """Automatically generated by Pitch.lines() to allow use of linecollection in legend.
240 |     """
241 | 
242 |     def __init__(self, invert_y=False, first_lw=False, use_cmap=False,
243 |                  marker_pad=0.3, numpoints=None, **kw):
244 |         HandlerLineCollection.__init__(self, marker_pad=marker_pad, numpoints=numpoints, **kw)
245 |         self.invert_y = invert_y
246 |         self.first_lw = first_lw
247 |         self.use_cmap = use_cmap
248 | 
249 |     def create_artists(self, legend, artist, xdescent, ydescent,
250 |                        width, height, fontsize, trans):
251 |         x = np.linspace(0, width, self.get_numpoints(legend) + 1)
252 |         y = np.zeros(self.get_numpoints(legend) + 1) + height / 2. - ydescent
253 |         points = np.array([x, y]).T.reshape(-1, 1, 2)
254 |         segments = np.concatenate([points[:-1], points[1:]], axis=1)
255 |         lw = artist.get_linewidth()
256 |         if self.first_lw:
257 |             lw = lw[0]
258 |         if self.use_cmap:
259 |             cmap = artist.cmap
260 |             if self.invert_y:
261 |                 cmap = cmap.reversed()
262 |             line_collection = LineCollection(segments, lw=lw, cmap=cmap,
263 |                                              snap=False, transform=trans)
264 |             line_collection.set_array(x)
265 |         else:
266 |             line_collection = LineCollection(segments, lw=lw, colors=artist.get_colors()[0],
267 |                                              snap=False, transform=trans)
268 |         return [line_collection]
269 | 


--------------------------------------------------------------------------------
/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data/1bdabe58386f57edc417a9ae6590a507635060bb/logo.jpg


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Oct 14 14:20:02 2020
  4 | 
  5 | @author: aliha
  6 | @twitter: rockingAli5 
  7 | """
  8 | 
  9 | import warnings
 10 | import time
 11 | import pandas as pd
 12 | pd.options.mode.chained_assignment = None
 13 | import json
 14 | from bs4 import BeautifulSoup as soup
 15 | import re 
 16 | from collections import OrderedDict
 17 | from datetime import datetime as dt
 18 | import itertools
 19 | import numpy as np
 20 | try:
 21 |     from tqdm import trange
 22 | except ModuleNotFoundError:
 23 |     pass
 24 | 
 25 | 
 26 | from selenium import webdriver
 27 | from selenium.common.exceptions import NoSuchElementException, WebDriverException
 28 | from selenium.webdriver.common.by import By
 29 | 
 30 | # options = webdriver.FirefoxOptions()
 31 | 
 32 | # options.add_experimental_option('excludeSwitches', ['enable-logging'])
 33 | 
 34 | 
 35 | TRANSLATE_DICT = {'Jan': 'Jan',
 36 |                  'Feb': 'Feb',
 37 |                  'Mac': 'Mar',
 38 |                  'Apr': 'Apr',
 39 |                  'Mei': 'May',
 40 |                  'Jun': 'Jun',
 41 |                  'Jul': 'Jul',
 42 |                  'Ago': 'Aug',
 43 |                  'Sep': 'Sep',
 44 |                  'Okt': 'Oct',
 45 |                  'Nov': 'Nov',
 46 |                  'Des': 'Dec',
 47 |                  'Jan': 'Jan',
 48 |                  'Feb': 'Feb',
 49 |                  'Mar': 'Mar',
 50 |                  'Apr': 'Apr',
 51 |                  'May': 'May',
 52 |                  'Jun': 'Jun',
 53 |                  'Jul': 'Jul',
 54 |                  'Aug': 'Aug',
 55 |                  'Sep': 'Sep',
 56 |                  'Oct': 'Oct',
 57 |                  'Nov': 'Nov',
 58 |                  'Dec': 'Dec'}
 59 | 
 60 | main_url = 'https://1xbet.whoscored.com/'
 61 | 
 62 | 
 63 | 
 64 | def getLeagueUrls(minimize_window=True):
 65 |     
 66 |     driver = webdriver.Firefox()
 67 | 
 68 |     if minimize_window:
 69 |         driver.minimize_window()
 70 | 
 71 |     driver.get(main_url)
 72 |     league_names = []
 73 |     league_urls = []
 74 |     try:
 75 |         cookie_button = driver.find_element(By.XPATH, '//*[@class=" css-gweyaj"]').click()
 76 |     except NoSuchElementException:
 77 |         pass
 78 |     tournaments_btn = driver.find_element(By.XPATH, '//*[@id="All-Tournaments-btn"]').click()
 79 |     n_button = soup(driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[1]/div/div').get_attribute('innerHTML')).find_all('button')
 80 |     n_tournaments = []
 81 |     for button in n_button:
 82 |         id_button = button.get('id')
 83 |         driver.find_element(By.ID, id_button).click()
 84 |         n_country = soup(driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[2]').get_attribute('innerHTML')).find_all('div', {'class':'TournamentsDropdownMenu-module_countryDropdownContainer__I9P6n'})
 85 | 
 86 |         for country in n_country:
 87 |             country_id = country.find('div', {'class': 'TournamentsDropdownMenu-module_countryDropdown__8rtD-'}).get('id')
 88 | 
 89 |             # Trouver l'élément avec Selenium et cliquer dessus
 90 |             country_element = driver.find_element(By.ID, country_id)
 91 |             country_element.click()
 92 | 
 93 |             html_tournaments_list = driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[2]').get_attribute('innerHTML')
 94 | 
 95 |             # Parse le HTML avec BeautifulSoup pour trouver les liens des tournois
 96 |             soup_tournaments = soup(html_tournaments_list, 'html.parser')
 97 |             tournaments = soup_tournaments.find_all('a')
 98 | 
 99 |             # Ajouter les tournois à la liste n_tournaments
100 |             n_tournaments.extend(tournaments)
101 | 
102 |             driver.execute_script("arguments[0].click();", country_element)
103 | 
104 | 
105 |     for tournament in n_tournaments:
106 |         league_name = tournament.get('href').split('/')[-1]
107 |         league_link = main_url[:-1]+tournament.get('href')
108 |         league_names.append(league_name)
109 |         league_urls.append(league_link)
110 | 
111 |     leagues = {}
112 |     for name,link in zip(league_names,league_urls):
113 |         leagues[name] = link
114 | 
115 |     driver.close()
116 |     return leagues
117 | 
118 | 
119 | def getMatchUrls(comp_urls, competition, season, maximize_window=True):
120 | 
121 |     driver = webdriver.Firefox()
122 |     
123 |     if maximize_window:
124 |         driver.maximize_window()
125 |     
126 |     comp_url = comp_urls[competition]
127 |     driver.get(comp_url)
128 |     time.sleep(5)
129 |     
130 |     seasons = driver.find_element(By.XPATH, '//*[@id="seasons"]').get_attribute('innerHTML').split(sep='\n')
131 |     seasons = [i for i in seasons if i]
132 |     
133 |     
134 |     for i in range(1, len(seasons)+1):
135 |         if driver.find_element(By.XPATH, '//*[@id="seasons"]/option['+str(i)+']').text == season:
136 |             driver.find_element(By.XPATH, '//*[@id="seasons"]/option['+str(i)+']').click()
137 |             
138 |             time.sleep(5)
139 |             try:
140 |                 stages = driver.find_element(By.XPATH, '//*[@id="stages"]').get_attribute('innerHTML').split(sep='\n')
141 |                 stages = [i for i in stages if i]
142 |                 
143 |                 all_urls = []
144 |             
145 |                 for i in range(1, len(stages)+1):
146 |                     print(driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text)
147 |                     if competition == 'Champions League' or competition == 'Europa League':
148 |                         if 'Grp' in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text or 'Final Stage' in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text:
149 |                             driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click()
150 |                             time.sleep(5)
151 |                             
152 |                             driver.execute_script("window.scrollTo(0, 400)") 
153 |                             
154 |                             match_urls = getFixtureData(driver)
155 |                             
156 |                             match_urls = getSortedData(match_urls)
157 |                             
158 |                             match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']]
159 |                             
160 |                             all_urls += match_urls2
161 |                         else:
162 |                             continue
163 |                     
164 |                     elif competition == 'Major League Soccer':
165 |                         if 'Grp. ' not in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text: 
166 |                             driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click()
167 |                             time.sleep(5)
168 |                         
169 |                             driver.execute_script("window.scrollTo(0, 400)")
170 |                             
171 |                             match_urls = getFixtureData(driver)
172 |                             
173 |                             match_urls = getSortedData(match_urls)
174 |                             
175 |                             match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']]
176 |                             
177 |                             all_urls += match_urls2
178 |                         else:
179 |                             continue
180 |                         
181 |                     else:
182 |                         driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click()
183 |                         time.sleep(5)
184 |                     
185 |                         driver.execute_script("window.scrollTo(0, 400)")
186 |                         
187 |                         match_urls = getFixtureData(driver)
188 |                         
189 |                         match_urls = getSortedData(match_urls)
190 |                         
191 |                         match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']]
192 |                         
193 |                         all_urls += match_urls2
194 |                 
195 |             except NoSuchElementException:
196 |                 all_urls = []
197 |                 
198 |                 driver.execute_script("window.scrollTo(0, 400)")
199 |                 
200 |                 match_urls = getFixtureData(driver)
201 |                 
202 |                 match_urls = getSortedData(match_urls)
203 |                 
204 |                 match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']]
205 |                 
206 |                 all_urls += match_urls2
207 |             
208 |             
209 |             remove_dup = [dict(t) for t in {tuple(sorted(d.items())) for d in all_urls}]
210 |             all_urls = getSortedData(remove_dup)
211 |             
212 |             driver.close() 
213 |     
214 |             return all_urls
215 |      
216 |     season_names = [re.search(r'\>(.*?)\<',season).group(1) for season in seasons]
217 |     driver.close() 
218 |     print('Seasons available: {}'.format(season_names))
219 |     raise('Season Not Found.')
220 |     
221 | 
222 | 
223 | 
224 | 
225 | def getTeamUrls(team, match_urls):
226 |     
227 |     team_data = []
228 |     for fixture in match_urls:
229 |         if fixture['home'] == team or fixture['away'] == team:
230 |             team_data.append(fixture)
231 |     team_data = [a[0] for a in itertools.groupby(team_data)]
232 |                 
233 |     return team_data
234 | 
235 | 
236 | def getMatchesData(match_urls, minimize_window=True):
237 |     
238 |     matches = []
239 |     
240 |     driver = webdriver.Firefox()
241 |     if minimize_window:
242 |         driver.minimize_window()
243 |     
244 |     try:
245 |         for i in trange(len(match_urls), desc='Getting Match Data'):
246 |             # recommended to avoid getting blocked by incapsula/imperva bots
247 |             time.sleep(7)
248 |             match_data = getMatchData(driver, main_url+match_urls[i]['url'], display=False, close_window=False)
249 |             matches.append(match_data)
250 |     except NameError:
251 |         print('Recommended: \'pip install tqdm\' for a progress bar while the data gets scraped....')
252 |         time.sleep(7)
253 |         for i in range(len(match_urls)):
254 |             match_data = getMatchData(driver, main_url+match_urls[i]['url'], display=False, close_window=False)
255 |             matches.append(match_data)
256 |     
257 |     driver.close()
258 |     
259 |     return matches
260 | 
261 | 
262 | 
263 | 
264 | def getFixtureData(driver):
265 |     matches_ls = []
266 |     while True:
267 |         initial = driver.page_source
268 |         all_fixtures = driver.find_elements(By.CLASS_NAME, 'Accordion-module_accordion__UuHD0')
269 |         for dates in all_fixtures:
270 |             fixtures = dates.find_elements(By.CLASS_NAME, 'Match-module_row__zwBOn')
271 |             date_row = dates.find_element(By.CLASS_NAME, 'Accordion-module_header__HqzWD')
272 |             for row in fixtures:
273 |                 url = row.find_element(By.TAG_NAME, 'a')
274 |                 if 'live' in url.get_attribute('href'):
275 |                     # print(url.get_attribute('href'))
276 |                     match_dict = {}
277 |                     element = soup(row.get_attribute('innerHTML'), features='lxml')
278 |                     teams_tag = element.find("div", {"class":"Match-module_teams__sGVeq"})
279 |                     link_tag = element.find("a")
280 |                     match_dict['date'] = date_row.text
281 |                     match_dict['home'] = teams_tag.find_all('a')[0].text
282 |                     match_dict['away'] = teams_tag.find_all('a')[1].text
283 |                     match_dict['score'] = ':'.join([t.text for t in link_tag.find_all('span')])
284 |                     match_dict['url'] = link_tag['href']
285 |                     # print(match_dict)
286 |                     matches_ls.append(match_dict)
287 |         prev_btn = driver.find_element(By.ID, 'dayChangeBtn-prev')
288 |         prev_btn.click()
289 |         time.sleep(1)
290 |         final = driver.page_source
291 |         if initial == final:
292 |             break
293 | 
294 |     return matches_ls
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | def translateDate(data):
302 |     
303 |     unwanted = []
304 |     for match in data:
305 |         date = match['date'].split()
306 |         if '?' not in date[0]:
307 |             try:
308 |                 match['date'] = ' '.join([TRANSLATE_DICT[date[0]], date[1], date[2]])
309 |             except KeyError:
310 |                 print(date)
311 |         else:
312 |             unwanted.append(data.index(match))
313 |     
314 |     # remove matches that got suspended/postponed
315 |     for i in sorted(unwanted, reverse = True):
316 |         del data[i]
317 |     
318 |     return data
319 | 
320 | 
321 | def getSortedData(data):
322 |     data = sorted(data, key = lambda i: dt.strptime(i['date'], '%A, %b %d %Y'))
323 |     return data
324 |     
325 | 
326 | 
327 | 
328 | def getMatchData(driver, url, display=True, close_window=True):
329 |     try:
330 |         driver.get(url)
331 |     except WebDriverException:
332 |         driver.get(url)
333 | 
334 |     time.sleep(5)
335 |     # get script data from page source
336 |     script_content = driver.find_element(By.XPATH, '//*[@id="layout-wrapper"]/script[1]').get_attribute('innerHTML')
337 | 
338 | 
339 |     # clean script content
340 |     script_content = re.sub(r"[\n\t]*", "", script_content)
341 |     script_content = script_content[script_content.index("matchId"):script_content.rindex("}")]
342 | 
343 | 
344 |     # this will give script content in list form 
345 |     script_content_list = list(filter(None, script_content.strip().split(',            ')))
346 |     metadata = script_content_list.pop(1) 
347 | 
348 | 
349 |     # string format to json format
350 |     match_data = json.loads(metadata[metadata.index('{'):])
351 |     keys = [item[:item.index(':')].strip() for item in script_content_list]
352 |     values = [item[item.index(':')+1:].strip() for item in script_content_list]
353 |     for key,val in zip(keys, values):
354 |         match_data[key] = json.loads(val)
355 | 
356 | 
357 |     # get other details about the match
358 |     region = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/span[1]').text
359 |     league = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[0]
360 |     season = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[1]
361 |     if len(driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')) == 2:
362 |         competition_type = 'League'
363 |         competition_stage = ''
364 |     elif len(driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - '))== 3:
365 |         competition_type = 'Knock Out'
366 |         competition_stage = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[-1]
367 |     else:
368 |         print('Getting more than 3 types of information about the competition.')
369 | 
370 |     match_data['region'] = region
371 |     match_data['league'] = league
372 |     match_data['season'] = season
373 |     match_data['competitionType'] = competition_type
374 |     match_data['competitionStage'] = competition_stage
375 | 
376 | 
377 |     # sort match_data dictionary alphabetically
378 |     match_data = OrderedDict(sorted(match_data.items()))
379 |     match_data = dict(match_data)
380 |     if display:
381 |         print('Region: {}, League: {}, Season: {}, Match Id: {}'.format(region, league, season, match_data['matchId']))
382 |     
383 |     
384 |     if close_window:
385 |         driver.close()
386 |         
387 |     return match_data
388 | 
389 | 
390 | 
391 | 
392 | 
393 | def createEventsDF(data):
394 |     events = data['events']
395 |     for event in events:
396 |         event.update({'matchId' : data['matchId'],
397 |                         'startDate' : data['startDate'],
398 |                         'startTime' : data['startTime'],
399 |                         'score' : data['score'],
400 |                         'ftScore' : data['ftScore'],
401 |                         'htScore' : data['htScore'],
402 |                         'etScore' : data['etScore'],
403 |                         'venueName' : data['venueName'],
404 |                         'maxMinute' : data['maxMinute']})
405 |     events_df = pd.DataFrame(events)
406 | 
407 |     # clean period column
408 |     events_df['period'] = pd.json_normalize(events_df['period'])['displayName']
409 | 
410 |     # clean type column
411 |     events_df['type'] = pd.json_normalize(events_df['type'])['displayName']
412 | 
413 |     # clean outcomeType column
414 |     events_df['outcomeType'] = pd.json_normalize(events_df['outcomeType'])['displayName']
415 | 
416 |     # clean outcomeType column
417 |     try:
418 |         x = events_df['cardType'].fillna({i: {} for i in events_df.index})
419 |         events_df['cardType'] = pd.json_normalize(x)['displayName'].fillna(False)
420 |     except KeyError:
421 |         events_df['cardType'] = False
422 | 
423 |     eventTypeDict = data['matchCentreEventTypeJson']  
424 |     events_df['satisfiedEventsTypes'] = events_df['satisfiedEventsTypes'].apply(lambda x: [list(eventTypeDict.keys())[list(eventTypeDict.values()).index(event)] for event in x])
425 | 
426 |     # clean qualifiers column
427 |     try:
428 |         for i in events_df.index:
429 |             row = events_df.loc[i, 'qualifiers'].copy()
430 |             if len(row) != 0:
431 |                 for irow in range(len(row)):
432 |                     row[irow]['type'] = row[irow]['type']['displayName']
433 |     except TypeError:
434 |         pass
435 | 
436 | 
437 |     # clean isShot column
438 |     with warnings.catch_warnings():
439 |         warnings.simplefilter("ignore", category=FutureWarning)
440 |         if 'isShot' in events_df.columns:
441 |             events_df['isShot'] = events_df['isShot'].replace(np.nan, False).infer_objects(copy=False)
442 |         else:
443 |             events_df['isShot'] = False
444 | 
445 |         # clean isGoal column
446 |         if 'isGoal' in events_df.columns:
447 |             events_df['isGoal'] = events_df['isGoal'].replace(np.nan, False).infer_objects(copy=False)
448 |         else:
449 |             events_df['isGoal'] = False
450 | 
451 |     # add player name column
452 |     with warnings.catch_warnings():
453 |         warnings.simplefilter("ignore", category=FutureWarning)
454 |         events_df.loc[events_df.playerId.notna(), 'playerId'] = events_df.loc[events_df.playerId.notna(), 'playerId'].astype(int).astype(str)    
455 |     player_name_col = events_df.loc[:, 'playerId'].map(data['playerIdNameDictionary']) 
456 |     events_df.insert(loc=events_df.columns.get_loc("playerId")+1, column='playerName', value=player_name_col)
457 | 
458 |     # add home/away column
459 |     h_a_col = events_df['teamId'].map({data['home']['teamId']:'h', data['away']['teamId']:'a'})
460 |     events_df.insert(loc=events_df.columns.get_loc("teamId")+1, column='h_a', value=h_a_col)
461 | 
462 | 
463 |     # adding shot body part column
464 |     events_df['shotBodyType'] =  np.nan
465 |     with warnings.catch_warnings():
466 |         warnings.simplefilter("ignore", category=FutureWarning)
467 |         for i in events_df.loc[events_df.isShot==True].index:
468 |             for j in events_df.loc[events_df.isShot==True].qualifiers.loc[i]:
469 |                 if j['type'] == 'RightFoot' or j['type'] == 'LeftFoot' or j['type'] == 'Head' or j['type'] == 'OtherBodyPart':
470 |                     events_df.loc[i, 'shotBodyType'] = j['type']
471 | 
472 | 
473 |     # adding shot situation column
474 |     events_df['situation'] =  np.nan
475 |     with warnings.catch_warnings():
476 |         warnings.simplefilter("ignore", category=FutureWarning)
477 |         for i in events_df.loc[events_df.isShot==True].index:
478 |             for j in events_df.loc[events_df.isShot==True].qualifiers.loc[i]:
479 |                 if j['type'] == 'FromCorner' or j['type'] == 'SetPiece' or j['type'] == 'DirectFreekick':
480 |                     events_df.loc[i, 'situation'] = j['type']
481 |                 if j['type'] == 'RegularPlay':
482 |                     events_df.loc[i, 'situation'] = 'OpenPlay' 
483 | 
484 |     event_types = list(data['matchCentreEventTypeJson'].keys())
485 |     event_type_cols = pd.DataFrame({event_type: pd.Series([event_type in row for row in events_df['satisfiedEventsTypes']]) for event_type in event_types})
486 |     events_df = pd.concat([events_df, event_type_cols], axis=1)
487 | 
488 | 
489 |     return events_df
490 |     
491 | 
492 | 
493 | 
494 | def createMatchesDF(data):
495 |     columns_req_ls = ['matchId', 'attendance', 'venueName', 'startTime', 'startDate',
496 |                       'score', 'home', 'away', 'referee']
497 |     matches_df = pd.DataFrame(columns=columns_req_ls)
498 |     if type(data) == dict:
499 |         matches_dict = dict([(key,val) for key,val in data.items() if key in columns_req_ls])
500 |         matches_df = pd.DataFrame(matches_dict, columns=columns_req_ls).reset_index(drop=True)
501 |         matches_df[['home', 'away']] = np.nan  
502 |         with warnings.catch_warnings():
503 |             warnings.simplefilter("ignore", category=FutureWarning)
504 |             matches_df['home'].iloc[0] = [data['home']]
505 |             matches_df['away'].iloc[0] = [data['away']]
506 |     else:
507 |         for match in data:
508 |             matches_dict = dict([(key,val) for key,val in match.items() if key in columns_req_ls])
509 |             matches_df = pd.DataFrame(matches_dict, columns=columns_req_ls).reset_index(drop=True)
510 |     
511 |     matches_df = matches_df.set_index('matchId')        
512 |     return matches_df
513 | 
514 | 
515 | 
516 | 
517 | def load_EPV_grid(fname='EPV_grid.csv'):
518 |     """ load_EPV_grid(fname='EPV_grid.csv')
519 |     
520 |     # load pregenerated EPV surface from file. 
521 |     
522 |     Parameters
523 |     -----------
524 |         fname: filename & path of EPV grid (default is 'EPV_grid.csv' in the curernt directory)
525 |         
526 |     Returns
527 |     -----------
528 |         EPV: The EPV surface (default is a (32,50) grid)
529 |     
530 |     """
531 |     epv = np.loadtxt(fname, delimiter=',')
532 |     return epv
533 | 
534 | 
535 | 
536 | 
537 | 
538 | 
539 | def get_EPV_at_location(position,EPV,attack_direction,field_dimen=(106.,68.)):
540 |     """ get_EPV_at_location
541 |     
542 |     Returns the EPV value at a given (x,y) location
543 |     
544 |     Parameters
545 |     -----------
546 |         position: Tuple containing the (x,y) pitch position
547 |         EPV: tuple Expected Possession value grid (loaded using load_EPV_grid() )
548 |         attack_direction: Sets the attack direction (1: left->right, -1: right->left)
549 |         field_dimen: tuple containing the length and width of the pitch in meters. Default is (106,68)
550 |             
551 |     Returrns
552 |     -----------
553 |         EPV value at input position
554 |         
555 |     """
556 |     
557 |     x,y = position
558 |     if abs(x)>field_dimen[0]/2. or abs(y)>field_dimen[1]/2.:
559 |         return 0.0 # Position is off the field, EPV is zero
560 |     else:
561 |         if attack_direction==-1:
562 |             EPV = np.fliplr(EPV)
563 |         ny,nx = EPV.shape
564 |         dx = field_dimen[0]/float(nx)
565 |         dy = field_dimen[1]/float(ny)
566 |         ix = (x+field_dimen[0]/2.-0.0001)/dx
567 |         iy = (y+field_dimen[1]/2.-0.0001)/dy
568 |         return EPV[int(iy),int(ix)]
569 | 
570 | 
571 | 
572 |                 
573 | 
574 | def to_metric_coordinates_from_whoscored(data,field_dimen=(106.,68.) ):
575 |     '''
576 |     Convert positions from Whoscored units to meters (with origin at centre circle)
577 |     '''
578 |     x_columns = [c for c in data.columns if c[-1].lower()=='x'][:2]
579 |     y_columns = [c for c in data.columns if c[-1].lower()=='y'][:2]
580 |     x_columns_mod = [c+'_metrica' for c in x_columns]
581 |     y_columns_mod = [c+'_metrica' for c in y_columns]
582 |     data[x_columns_mod] = (data[x_columns]/100*106)-53
583 |     data[y_columns_mod] = (data[y_columns]/100*68)-34
584 |     return data
585 | 
586 | 
587 | 
588 | 
589 | def addEpvToDataFrame(data):
590 | 
591 |     # loading EPV data
592 |     EPV = load_EPV_grid('EPV_grid.csv')
593 | 
594 |     # converting opta coordinates to metric coordinates
595 |     data = to_metric_coordinates_from_whoscored(data)
596 | 
597 |     # calculating EPV for events
598 |     EPV_difference = []
599 |     for i in data.index:
600 |         if data.loc[i, 'type'] == 'Pass' and data.loc[i, 'outcomeType'] == 'Successful':
601 |             start_pos = (data.loc[i, 'x_metrica'], data.loc[i, 'y_metrica'])
602 |             start_epv = get_EPV_at_location(start_pos, EPV, attack_direction=1)
603 |             
604 |             end_pos = (data.loc[i, 'endX_metrica'], data.loc[i, 'endY_metrica'])
605 |             end_epv = get_EPV_at_location(end_pos, EPV, attack_direction=1)
606 |             
607 |             diff = end_epv - start_epv
608 |             EPV_difference.append(diff)
609 |             
610 |         else:
611 |             EPV_difference.append(np.nan)
612 |     
613 |     data = data.assign(EPV_difference = EPV_difference)
614 |     
615 |     
616 |     # dump useless columns
617 |     drop_cols = ['x_metrica', 'endX_metrica', 'y_metrica',
618 |                  'endY_metrica']
619 |     data.drop(drop_cols, axis=1, inplace=True)
620 |     data.rename(columns={'EPV_difference': 'EPV'}, inplace=True)
621 |     
622 |     return data
623 | 
624 | 
625 | 
626 | 
627 | 
628 | 
629 | 
630 | 
631 | 
632 | 
633 | 
634 | 
635 | 
636 | 
637 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | matplotlib
4 | seaborn
5 | selenium==4.16.0
6 | mplsoccer==1.2.2
7 | requests
8 | unzip
9 | tqdm


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | # import relevant functions 
 2 | from main import getLeagueUrls, getMatchUrls, getTeamUrls, getMatchesData, getMatchData, createEventsDF, createMatchesDF, addEpvToDataFrame
 3 | 
 4 | # import relevant variables
 5 | from main import main_url
 6 | 
 7 | # import relevant packages
 8 | import pandas as pd
 9 | 
10 | from selenium import webdriver 
11 | options = webdriver.ChromeOptions()
12 | options.add_experimental_option('excludeSwitches', ['enable-logging'])
13 | 
14 | 
15 | 
16 | 
17 | # write test functions for all functions in file
18 | def test():
19 |     print('Testing getLeagueUrls function...')
20 |     leagues = getLeagueUrls()
21 |     assert type(leagues) == dict
22 |     assert len(leagues) == 23
23 |     print('getLeagueUrls function passed all tests.')
24 |     
25 |     print('Testing getMatchUrls function...')
26 |     comp_urls = getLeagueUrls()
27 |     match_urls = getMatchUrls(comp_urls, 'Premier League', '2019/2020')
28 |     assert type(match_urls) == list
29 |     assert len(match_urls) == 380
30 |     print('getMatchUrls function passed all tests.')
31 |     
32 |     print('Testing getTeamUrls function...')
33 |     team_urls = getTeamUrls('Liverpool', match_urls)
34 |     assert type(team_urls) == list
35 |     assert len(team_urls) == 38
36 |     print('getTeamUrls function passed all tests.')
37 |     
38 |     print('Testing getMatchesData function...')
39 |     matches = getMatchesData(team_urls)
40 |     assert type(matches) == list
41 |     assert len(matches) == 38
42 |     print('getMatchesData function passed all tests.')
43 |     
44 |     print('Testing getMatchData function...')
45 |     driver = webdriver.Chrome('drivers/chromedriver.exe', options=options)
46 |     match_data = getMatchData(driver, main_url+'/Matches/1375927/Live/England-Premier-League-2019-2020-Liverpool-Norwich')
47 |     assert type(match_data) == dict
48 |     assert len(match_data) == 36
49 |     print('getMatchData function passed all tests.')
50 |     
51 |     print('Testing createEventsDF function...')
52 |     events_df = createEventsDF(match_data)
53 |     assert type(events_df) == pd.core.frame.DataFrame
54 |     assert events_df.shape[1] == 259
55 |     print('createEventsDF function passed all tests.')
56 |     
57 |     print('Testing createMatchesDF function...')
58 |     matches_df = createMatchesDF(match_data)
59 |     assert type(matches_df) == pd.core.frame.DataFrame
60 |     assert matches_df.shape[1] == 8
61 |     print('createMatchesDF function passed all tests.')
62 |     
63 |     print('Testing addEpvToDataFrame function...')
64 |     events_df = addEpvToDataFrame(events_df)
65 |     assert type(events_df) == pd.core.frame.DataFrame
66 |     assert events_df.shape[1] == 260
67 |     print('addEpvToDataFrame function passed all tests.')
68 |     
69 |     print('All tests passed.')  
70 | 
71 | if __name__ == '__main__':
72 |     test()
73 |     
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from sys import platform
 4 | 
 5 | def extract_version_registry(output):
 6 |     try:
 7 |         google_version = ''
 8 |         for letter in output[output.rindex('DisplayVersion    REG_SZ') + 24:]:
 9 |             if letter != '\n':
10 |                 google_version += letter
11 |             else:
12 |                 break
13 |         return(google_version.strip())
14 |     except TypeError:
15 |         return
16 | 
17 | def extract_version_folder():
18 |     # Check if the Chrome folder exists in the x32 or x64 Program Files folders.
19 |     for i in range(2):
20 |         path = 'C:\\Program Files' + (' (x86)' if i else '') +'\\Google\\Chrome\\Application'
21 |         if os.path.isdir(path):
22 |             paths = [f.path for f in os.scandir(path) if f.is_dir()]
23 |             for path in paths:
24 |                 filename = os.path.basename(path)
25 |                 pattern = '\d+\.\d+\.\d+\.\d+'
26 |                 match = re.search(pattern, filename)
27 |                 if match and match.group():
28 |                     # Found a Chrome version.
29 |                     return match.group(0)
30 | 
31 |     return None
32 | 
33 | def get_chrome_version():
34 |     version = None
35 |     install_path = None
36 | 
37 |     try:
38 |         if platform == "linux" or platform == "linux2":
39 |             # linux
40 |             install_path = "/usr/bin/google-chrome"
41 |         elif platform == "darwin":
42 |             # OS X
43 |             install_path = "/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome"
44 |         elif platform == "win32":
45 |             # Windows...
46 |             try:
47 |                 # Try registry key.
48 |                 stream = os.popen('reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
49 |                 output = stream.read()
50 |                 version = extract_version_registry(output)
51 |             except Exception as ex:
52 |                 # Try folder path.
53 |                 version = extract_version_folder()
54 |     except Exception as ex:
55 |         print(ex)
56 | 
57 |     version = os.popen(f"{install_path} --version").read().strip('Google Chrome ').strip() if install_path else version
58 | 
59 |     return version


--------------------------------------------------------------------------------
/visuals.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Oct 14 14:38:46 2020
  4 | 
  5 | @author: aliha
  6 | @twitter: rockingAli5 
  7 | """
  8 | 
  9 | import pandas as pd
 10 | import numpy as np
 11 | from mplsoccer.pitch import Pitch, VerticalPitch
 12 | from matplotlib.colors import to_rgba
 13 | from matplotlib.patches import ConnectionPatch
 14 | from itertools import combinations
 15 | import seaborn as sns
 16 | 
 17 | 
 18 | def createShotmap(match_data, events_df, team, pitchcolor, shotcolor, goalcolor, 
 19 |                   titlecolor, legendcolor, marker_size, fig, ax):
 20 |     # getting team id and venue
 21 |     if match_data['home']['name'] == team:
 22 |         teamId = match_data['home']['teamId']
 23 |         venue = 'home'
 24 |     else:
 25 |         teamId = match_data['away']['teamId']
 26 |         venue = 'away'
 27 |         
 28 |     # getting opponent   
 29 |     if venue == 'home':
 30 |         opponent = match_data['away']['name']
 31 |     else:
 32 |         opponent = match_data['home']['name']
 33 |         
 34 |     total_shots = events_df.loc[events_df['isShot']==True].reset_index(drop=True)
 35 |     team_shots = total_shots.loc[total_shots['teamId'] == teamId].reset_index(drop=True)
 36 |     mask_goal = team_shots.isGoal == True
 37 | 
 38 |     # Setup the pitch
 39 |     # orientation='vertical'
 40 |     pitch = VerticalPitch(pitch_type='statsbomb', pitch_color=pitchcolor, line_color='#c7d5cc',
 41 |                           half=True, pad_top=2)
 42 |     pitch.draw(ax=ax, tight_layout=True, constrained_layout=True)
 43 | 
 44 | 
 45 |     # Plot the goals
 46 |     pitch.scatter(team_shots[mask_goal].x/100*120, 80-team_shots[mask_goal].y/100*80, s=marker_size,
 47 |                   edgecolors='black', c=goalcolor, zorder=2,
 48 |                   label='goal', ax=ax)
 49 |     pitch.scatter(team_shots[~mask_goal].x/100*120, 80-team_shots[~mask_goal].y/100*80,
 50 |                   edgecolors='white', c=shotcolor, s=marker_size, zorder=2,
 51 |                   label='shot', ax=ax)
 52 |     # Set the title
 53 |     ax.set_title(f'{team} shotmap \n vs {opponent}', fontsize=30, color=titlecolor)
 54 | 
 55 |     # set legend
 56 |     leg = ax.legend(facecolor=pitchcolor, edgecolor='None', fontsize=20, loc='lower center', handlelength=4)
 57 |     leg_texts = leg.get_texts() # list of matplotlib Text instances.
 58 |     leg_texts[0].set_color(legendcolor)
 59 |     leg_texts[1].set_color(legendcolor)
 60 |     
 61 |     # Set the figure facecolor
 62 |     fig.set_facecolor(pitchcolor)
 63 |     
 64 |     
 65 |     
 66 |     
 67 | 
 68 | 
 69 | def createPassNetworks(match_data, events_df, matchId, team, max_line_width, 
 70 |                        marker_size, edgewidth, dh_arrow_width, marker_color, 
 71 |                        marker_edge_color, shrink, ax, kit_no_size=20):
 72 |     
 73 |     # getting team id and venue
 74 |     if match_data['home']['name'] == team:
 75 |         teamId = match_data['home']['teamId']
 76 |         venue = 'home'
 77 |     else:
 78 |         teamId = match_data['away']['teamId']
 79 |         venue = 'away'
 80 |     
 81 |     
 82 |     # getting opponent   
 83 |     if venue == 'home':
 84 |         opponent = match_data['away']['name']
 85 |     else:
 86 |         opponent = match_data['home']['name']
 87 |     
 88 |     
 89 |     # getting player dictionary
 90 |     team_players_dict = {}
 91 |     for player in match_data[venue]['players']:
 92 |         team_players_dict[player['playerId']] = player['name']
 93 |     
 94 |     
 95 |     # getting minute of first substitution
 96 |     for i in events_df.index:
 97 |         if events_df.loc[i, 'type'] == 'SubstitutionOn' and events_df.loc[i, 'teamId'] == teamId:
 98 |             sub_minute = str(events_df.loc[i, 'minute'])
 99 |             break
100 |     
101 |     
102 |     # getting players dataframe
103 |     match_players_df = pd.DataFrame()
104 |     player_names = []
105 |     player_ids = []
106 |     player_pos = []
107 |     player_kit_number = []
108 | 
109 | 
110 |     for player in match_data[venue]['players']:
111 |         player_names.append(player['name'])
112 |         player_ids.append(player['playerId'])
113 |         player_pos.append(player['position'])
114 |         player_kit_number.append(player['shirtNo'])
115 | 
116 |     match_players_df['playerId'] = player_ids
117 |     match_players_df['playerName'] = player_names
118 |     match_players_df['playerPos'] = player_pos
119 |     match_players_df['playerKitNumber'] = player_kit_number
120 |     
121 |     
122 |     # extracting passes
123 |     passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1)
124 |     passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64')
125 |     if 'playerName' in passes_df.columns:
126 |         passes_df = passes_df.drop(columns='playerName')
127 |     passes_df.dropna(subset=["playerId"], inplace=True)
128 |     passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])])
129 |     if 'passRecipientId' in passes_df.columns:
130 |         passes_df = passes_df.drop(columns='passRecipientId')
131 |         passes_df = passes_df.drop(columns='passRecipientName')
132 |     passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1))  
133 |     passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1))  
134 |     passes_df.dropna(subset=["passRecipientName"], inplace=True)
135 |     passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True)
136 |     passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True)
137 |     index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index
138 |     passes_df.drop(index_names, inplace=True)
139 |     passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1')
140 |     passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'},
141 |                                                         axis='columns'), on=['passRecipientId', 'passRecipientName'],
142 |                                                         how='left', validate='m:1', suffixes=['', 'Receipt'])
143 |     passes_df = passes_df[passes_df['playerPos'] != 'Sub']
144 |     
145 |     
146 |     # getting team formation
147 |     formation = match_data[venue]['formations'][0]['formationName']
148 |     formation = '-'.join(formation)
149 |     
150 |     
151 |     # getting player average locations
152 |     location_formation = passes_df[['playerKitNumber', 'x', 'y']]
153 |     average_locs_and_count = location_formation.groupby('playerKitNumber').agg({'x': ['mean'], 'y': ['mean', 'count']})
154 |     average_locs_and_count.columns = ['x', 'y', 'count']
155 | 
156 |     
157 |     # getting separate dataframe for selected columns 
158 |     passes_formation = passes_df[['id', 'playerKitNumber', 'playerKitNumberReceipt']].copy()
159 |     passes_formation['EPV'] = passes_df['EPV']
160 | 
161 |     
162 |     # getting dataframe for passes between players
163 |     passes_between = passes_formation.groupby(['playerKitNumber', 'playerKitNumberReceipt']).agg({ 'id' : 'count', 'EPV' : 'sum'}).reset_index()        
164 |     passes_between.rename({'id': 'pass_count'}, axis='columns', inplace=True)
165 |     passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumberReceipt', right_index=True)
166 |     passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumber', right_index=True,
167 |                                           suffixes=['', '_end'])
168 |     
169 |     
170 |     # filtering passes
171 |     pass_filter = int(passes_between['pass_count'].mean())
172 |     passes_between = passes_between.loc[passes_between['pass_count'] > pass_filter]
173 |     
174 |     
175 |     # calculating the line width 
176 |     passes_between['width'] = passes_between.pass_count / passes_between.pass_count.max() * max_line_width
177 |     passes_between = passes_between.reset_index(drop=True)
178 |     
179 |     
180 |     # setting color to make the lines more transparent when fewer passes are made
181 |     min_transparency = 0.3
182 |     color = np.array(to_rgba('white'))
183 |     color = np.tile(color, (len(passes_between), 1))
184 |     c_transparency = passes_between.pass_count / passes_between.pass_count.max()
185 |     c_transparency = (c_transparency * (1 - min_transparency)) + min_transparency
186 |     color[:, 3] = c_transparency
187 |     passes_between['alpha'] = color.tolist()
188 | 
189 |     
190 |     # separating paired passes from normal passes
191 |     passes_between_threshold = 15
192 |     filtered_pair_df = []
193 |     pair_list = [comb for comb in combinations(passes_between['playerKitNumber'].unique(), 2)]
194 |     for pair in pair_list:
195 |         df = passes_between[((passes_between['playerKitNumber']==pair[0]) & (passes_between['playerKitNumberReceipt']==pair[1])) | 
196 |                             ((passes_between['playerKitNumber']==pair[1]) & (passes_between['playerKitNumberReceipt']==pair[0]))]
197 |         if df.shape[0] == 2:
198 |             if (np.array(df.pass_count)[0] >= passes_between_threshold) and (np.array(df.pass_count)[1] >= passes_between_threshold):
199 |                 filtered_pair_df.append(df)
200 |                 passes_between.drop(df.index, inplace=True)
201 |     if len(filtered_pair_df) > 0:
202 |         filtered_pair_df = pd.concat(filtered_pair_df).reset_index(drop=True)
203 |         passes_between = passes_between.reset_index(drop=True)
204 |     
205 |     
206 |     # plotting
207 |     pitch = Pitch(pitch_type='opta', pitch_color='#171717', line_color='#5c5c5c',
208 |                   goal_type='box')
209 |     pitch.draw(ax=ax, constrained_layout=True, tight_layout=True)
210 |     average_locs_and_count['zorder'] = list(np.linspace(1,5,11))
211 |     for i in average_locs_and_count.index:
212 |         pitch.scatter(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y'], s=marker_size,
213 |                       color=marker_color, edgecolors=marker_edge_color, linewidth=edgewidth, 
214 |                       alpha=1, zorder=average_locs_and_count.loc[i, 'zorder'], ax=ax)
215 |     
216 |     for i in passes_between.index:
217 |         x = passes_between.loc[i, 'x']
218 |         y = passes_between.loc[i, 'y']
219 |         endX = passes_between.loc[i, 'x_end']
220 |         endY = passes_between.loc[i, 'y_end']
221 |         coordsA = "data"
222 |         coordsB = "data"
223 |         con = ConnectionPatch([endX, endY], [x, y],
224 |                               coordsA, coordsB,
225 |                               arrowstyle="simple", shrinkA=shrink, shrinkB=shrink,
226 |                               mutation_scale=passes_between.loc[i, 'width']*max_line_width, color=passes_between.loc[i, 'alpha'])
227 |         ax.add_artist(con)
228 |     
229 |     if len(filtered_pair_df) > 0:
230 |         for i in filtered_pair_df.index:
231 |             x = filtered_pair_df.loc[i, 'x']
232 |             y = filtered_pair_df.loc[i, 'y']
233 |             endX = filtered_pair_df.loc[i, 'x_end']
234 |             endY = filtered_pair_df.loc[i, 'y_end']
235 |             coordsA = "data"
236 |             coordsB = "data"
237 |             con = ConnectionPatch([endX, endY], [x, y],
238 |                                   coordsA, coordsB,
239 |                                   arrowstyle="<|-|>", shrinkA=shrink, shrinkB=shrink,
240 |                                   mutation_scale=dh_arrow_width, lw=filtered_pair_df.loc[i, 'width']*max_line_width/5, 
241 |                                   color=filtered_pair_df.loc[i, 'alpha'])
242 |             ax.add_artist(con)
243 |     
244 |     for i in average_locs_and_count.index:
245 |         pitch.annotate(i, xy=(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y']), 
246 |                        family='DejaVu Sans', c='white', 
247 |                        va='center', ha='center', zorder=average_locs_and_count.loc[i, 'zorder'], size=kit_no_size, weight='bold', ax=ax)
248 |     ax.text(50, 104, "{} (Mins 1-{})".format(team, sub_minute).upper(), size=10, fontweight='bold', ha='center',
249 |            va='center', c='white')
250 |     ax.text(2, 3, '{}'.format(formation), size=9, c='grey')
251 | 
252 |     
253 |     
254 |     
255 |     
256 |     
257 | def createAttPassNetworks(match_data, events_df, matchId, team, max_line_width, 
258 |                       marker_size, edgewidth, dh_arrow_width, marker_color, 
259 |                       marker_edge_color, shrink, ax, kit_no_size = 20):
260 |     
261 |     # getting team id and venue
262 |     if match_data['home']['name'] == team:
263 |         teamId = match_data['home']['teamId']
264 |         venue = 'home'
265 |     else:
266 |         teamId = match_data['away']['teamId']
267 |         venue = 'away'
268 |     
269 |     
270 |     # getting opponent   
271 |     if venue == 'home':
272 |         opponent = match_data['away']['name']
273 |     else:
274 |         opponent = match_data['home']['name']
275 |     
276 |     
277 |     # getting player dictionary
278 |     team_players_dict = {}
279 |     for player in match_data[venue]['players']:
280 |         team_players_dict[player['playerId']] = player['name']
281 |     
282 |     
283 |     # getting minute of first substitution
284 |     for i in events_df.index:
285 |         if events_df.loc[i, 'type'] == 'SubstitutionOn' and events_df.loc[i, 'teamId'] == teamId:
286 |             sub_minute = str(events_df.loc[i, 'minute'])
287 |             break
288 |     
289 |     
290 |     # getting players dataframe
291 |     match_players_df = pd.DataFrame()
292 |     player_names = []
293 |     player_ids = []
294 |     player_pos = []
295 |     player_kit_number = []
296 | 
297 | 
298 |     for player in match_data[venue]['players']:
299 |         player_names.append(player['name'])
300 |         player_ids.append(player['playerId'])
301 |         player_pos.append(player['position'])
302 |         player_kit_number.append(player['shirtNo'])
303 | 
304 |     match_players_df['playerId'] = player_ids
305 |     match_players_df['playerName'] = player_names
306 |     match_players_df['playerPos'] = player_pos
307 |     match_players_df['playerKitNumber'] = player_kit_number
308 |     
309 |     
310 |     # extracting passes
311 |     passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1)
312 |     passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64')
313 |     if 'playerName' in passes_df.columns:
314 |         passes_df = passes_df.drop(columns='playerName')
315 |     passes_df.dropna(subset=["playerId"], inplace=True)
316 |     passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])])
317 |     if 'passRecipientId' in passes_df.columns:
318 |         passes_df = passes_df.drop(columns='passRecipientId')
319 |         passes_df = passes_df.drop(columns='passRecipientName')
320 |     passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1))  
321 |     passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1))  
322 |     passes_df.dropna(subset=["passRecipientName"], inplace=True)
323 |     passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True)
324 |     passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True)
325 |     index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index
326 |     passes_df.drop(index_names, inplace=True)
327 |     passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1')
328 |     passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'},
329 |                                                         axis='columns'), on=['passRecipientId', 'passRecipientName'],
330 |                                                         how='left', validate='m:1', suffixes=['', 'Receipt'])
331 |     passes_df = passes_df[passes_df['playerPos'] != 'Sub']
332 |     
333 |     
334 |     # getting team formation
335 |     formation = match_data[venue]['formations'][0]['formationName']
336 |     formation = '-'.join(formation)
337 |     
338 |     
339 |     # getting player average locations
340 |     location_formation = passes_df[['playerKitNumber', 'x', 'y']]
341 |     average_locs_and_count = location_formation.groupby('playerKitNumber').agg({'x': ['mean'], 'y': ['mean', 'count']})
342 |     average_locs_and_count.columns = ['x', 'y', 'count']
343 |     
344 |     
345 |     # filtering progressive passes 
346 |     passes_df = passes_df.loc[passes_df['EPV'] > 0]
347 | 
348 |     
349 |     # getting separate dataframe for selected columns 
350 |     passes_formation = passes_df[['id', 'playerKitNumber', 'playerKitNumberReceipt']].copy()
351 |     passes_formation['EPV'] = passes_df['EPV']
352 | 
353 | 
354 |     # getting dataframe for passes between players
355 |     passes_between = passes_formation.groupby(['playerKitNumber', 'playerKitNumberReceipt']).agg({ 'id' : 'count', 'EPV' : 'sum'}).reset_index()
356 |     passes_between.rename({'id': 'pass_count'}, axis='columns', inplace=True)
357 |     passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumberReceipt', right_index=True)
358 |     passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumber', right_index=True,
359 |                                           suffixes=['', '_end'])
360 |     
361 |     
362 |     # filtering passes
363 |     pass_filter = int(passes_between['pass_count'].mean())
364 |     passes_between = passes_between.loc[passes_between['pass_count'] > pass_filter*2]
365 |     
366 |     
367 |     # calculating the line width and marker sizes relative to the largest counts
368 |     passes_between['width'] = passes_between.pass_count / passes_between.pass_count.max() * max_line_width
369 |     passes_between = passes_between.reset_index(drop=True)
370 |     
371 |     
372 |     # setting color to make the lines more transparent when fewer passes are made
373 |     min_transparency = 0.3
374 |     color = np.array(to_rgba('white'))
375 |     color = np.tile(color, (len(passes_between), 1))
376 |     c_transparency = passes_between.EPV / passes_between.EPV.max()
377 |     c_transparency = (c_transparency * (1 - min_transparency)) + min_transparency
378 |     color[:, 3] = c_transparency
379 |     passes_between['alpha'] = color.tolist()
380 |     
381 |     
382 |     # separating paired passes from normal passes
383 |     passes_between_threshold = 20
384 |     filtered_pair_df = []
385 |     pair_list = [comb for comb in combinations(passes_between['playerKitNumber'].unique(), 2)]
386 |     for pair in pair_list:
387 |         df = passes_between[((passes_between['playerKitNumber']==pair[0]) & (passes_between['playerKitNumberReceipt']==pair[1])) | 
388 |                             ((passes_between['playerKitNumber']==pair[1]) & (passes_between['playerKitNumberReceipt']==pair[0]))]
389 |         if df.shape[0] == 2:
390 |             if np.array(df.pass_count)[0]+np.array(df.pass_count)[1] >= passes_between_threshold:
391 |                 filtered_pair_df.append(df)
392 |                 passes_between.drop(df.index, inplace=True)
393 |     if len(filtered_pair_df) > 0:
394 |         filtered_pair_df = pd.concat(filtered_pair_df).reset_index(drop=True)
395 |         passes_between = passes_between.reset_index(drop=True)
396 |     
397 |     
398 |     # plotting
399 |     pitch = Pitch(pitch_type='opta', pitch_color='#171717', line_color='#5c5c5c',
400 |                   goal_type='box')
401 |     pitch.draw(ax=ax, constrained_layout=True, tight_layout=True)
402 |     
403 |     average_locs_and_count['zorder'] = list(np.linspace(1,5,11))
404 |     for i in average_locs_and_count.index:
405 |         pitch.scatter(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y'], s=marker_size,
406 |                       color=marker_color, edgecolors=marker_edge_color, linewidth=edgewidth, 
407 |                       alpha=1, zorder=average_locs_and_count.loc[i, 'zorder'], ax=ax)
408 |     
409 |     for i in passes_between.index:
410 |         x = passes_between.loc[i, 'x']
411 |         y = passes_between.loc[i, 'y']
412 |         endX = passes_between.loc[i, 'x_end']
413 |         endY = passes_between.loc[i, 'y_end']
414 |         coordsA = "data"
415 |         coordsB = "data"
416 |         con = ConnectionPatch([endX, endY], [x, y],
417 |                               coordsA, coordsB,
418 |                               arrowstyle="simple", shrinkA=shrink, shrinkB=shrink,
419 |                               mutation_scale=passes_between.loc[i, 'width']*max_line_width, color=passes_between.loc[i, 'alpha'])
420 |         ax.add_artist(con)
421 |     
422 |     if len(filtered_pair_df) > 0:
423 |         for i in filtered_pair_df.index:
424 |             x = filtered_pair_df.loc[i, 'x']
425 |             y = filtered_pair_df.loc[i, 'y']
426 |             endX = filtered_pair_df.loc[i, 'x_end']
427 |             endY = filtered_pair_df.loc[i, 'y_end']
428 |             coordsA = "data"
429 |             coordsB = "data"
430 |             con = ConnectionPatch([endX, endY], [x, y],
431 |                                   coordsA, coordsB,
432 |                                   arrowstyle="<|-|>", shrinkA=shrink, shrinkB=shrink,
433 |                                   mutation_scale=dh_arrow_width, lw=filtered_pair_df.loc[i, 'width']*max_line_width/5, 
434 |                                   color=filtered_pair_df.loc[i, 'alpha'])
435 |             ax.add_artist(con)
436 |     
437 |     for i in average_locs_and_count.index:
438 |         pitch.annotate(i, xy=(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y']), 
439 |                        family='DejaVu Sans', c='white', 
440 |                        va='center', ha='center', zorder=average_locs_and_count.loc[i, 'zorder'], size=kit_no_size, weight='bold', ax=ax)
441 |     ax.text(50, 104, "{} (Mins 1-{})".format(team, sub_minute).upper(), size=10, fontweight='bold', ha='center',
442 |            va='center', c='white')
443 |     ax.text(2, 3, '{}'.format(formation), size=9, c='grey')
444 | 
445 |     
446 |     
447 | 
448 | 
449 | 
450 | 
451 | 
452 | 
453 | def getTeamSuccessfulBoxPasses(events_df, teamId, team, pitch_color, cmap):
454 |     """
455 |     Parameters
456 |     ----------
457 |     events_df : DataFrame of all events.
458 |     
459 |     teamId : ID of the team, the passes of which are required.
460 |     
461 |     team : Name of the team, the passes of which are required.
462 |     
463 |     pitch_color : color of the pitch.
464 |     
465 |     cmap : color design of the pass lines. 
466 |            You can select more cmaps here: 
467 |                https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
468 | 
469 |     Returns
470 |     -------
471 |     Pitch Plot.
472 | 
473 |     """
474 |     
475 |     # Get Total Passes
476 |     passes_df = events_df.loc[events_df['type']=='Pass'].reset_index(drop=True)
477 |     
478 |     # Get Team Passes
479 |     team_passes = passes_df.loc[passes_df['teamId'] == teamId]
480 |         
481 |     # Extracting Box Passes from Total Passes
482 |     box_passes = team_passes.copy()
483 |     for i,pas in box_passes.iterrows():
484 |         X = pas["x"]/100*120
485 |         Xend = pas["endX"]/100*120
486 |         Y = pas["y"]/100*80
487 |         Yend = pas["endY"]/100*80
488 |         if Xend >= 102 and Yend >= 18 and Yend <= 62:
489 |             if X >=102 and Y >= 18 and Y <= 62:
490 |                 box_passes = box_passes.drop([i])
491 |             else:
492 |                 pass
493 |         else:
494 |             box_passes = box_passes.drop([i])
495 |             
496 |     
497 |     successful_box_passes = box_passes.loc[box_passes['outcomeType']=='Successful'].reset_index(drop=True)
498 |     
499 |         
500 |     # orientation='vertical'
501 |     pitch = VerticalPitch(pitch_type='statsbomb', pitch_color=pitch_color, line_color='#c7d5cc',
502 |                           half=True, pad_top=2)
503 |     fig, ax = pitch.draw(tight_layout=True)
504 |     
505 |     # Plot the completed passes
506 |     pitch.lines(successful_box_passes.x/100*120, 80-successful_box_passes.y/100*80,
507 |                 successful_box_passes.endX/100*120, 80-successful_box_passes.endY/100*80,
508 |                 lw=5, cmap=cmap, opp_comet=True, opp_transparent=True,
509 |                 label='Successful Passes', ax=ax)
510 |     
511 |     pitch.scatter(successful_box_passes.x/100*120, 80-successful_box_passes.y/100*80,
512 |                   edgecolors='white', c='white', s=50, zorder=2,
513 |                   ax=ax)
514 |     
515 |     # Set the title
516 |     fig.suptitle(f'Completed Box Passes - {team}', y=.95, fontsize=15)
517 |     
518 |     # Set the subtitle
519 |     ax.set_title('Data : Whoscored/Opta', fontsize=8, loc='right', fontstyle='italic', fontweight='bold')
520 |     
521 |     # set legend
522 |     #ax.legend(facecolor='#22312b', edgecolor='None', fontsize=8, loc='lower center', handlelength=4)
523 |     
524 |     # Set the figure facecolor
525 |     fig.set_facecolor(pitch_color) 
526 | 
527 | 
528 | 
529 | 
530 | 
531 | 
532 | 
533 | 
534 | def getTeamTotalPasses(events_df, teamId, team, opponent, pitch_color):
535 |     """
536 |     
537 | 
538 |     Parameters
539 |     ----------
540 |     events_df : DataFrame of all events.
541 |     
542 |     teamId : ID of the team, the passes of which are required.
543 |     
544 |     team : Name of the team, the passes of which are required.
545 |     
546 |     opponent : Name of opponent team.
547 |     
548 |     pitch_color : color of the pitch.
549 | 
550 | 
551 |     Returns
552 |     -------
553 |     Pitch Plot.
554 |     """
555 |     
556 |     # Get Total Passes
557 |     passes_df = events_df.loc[events_df['type']=='Pass'].reset_index(drop=True)
558 |     
559 |     # Get Team Passes
560 |     team_passes = passes_df.loc[passes_df['teamId'] == teamId]
561 |         
562 |     successful_passes = team_passes.loc[team_passes['outcomeType']=='Successful'].reset_index(drop=True)
563 |     unsuccessful_passes = team_passes.loc[team_passes['outcomeType']=='Unsuccessful'].reset_index(drop=True)
564 |             
565 |     # Setup the pitch
566 |     pitch = Pitch(pitch_type='statsbomb', pitch_color=pitch_color, line_color='#c7d5cc')
567 |     fig, ax = pitch.draw(constrained_layout=True, tight_layout=False)
568 |     # fig.set_size_inches(14, 10)
569 |     
570 |     # Plot the completed passes
571 |     pitch.arrows(successful_passes.x/100*120, 80-successful_passes.y/100*80,
572 |                  successful_passes.endX/100*120, 80-successful_passes.endY/100*80, width=1,
573 |                  headwidth=10, headlength=10, color='#ad993c', ax=ax, label='Completed')
574 |     
575 |     # Plot the other passes
576 |     pitch.arrows(unsuccessful_passes.x/100*120, 80-unsuccessful_passes.y/100*80,
577 |                  unsuccessful_passes.endX/100*120, 80-unsuccessful_passes.endY/100*80, width=1,
578 |                  headwidth=6, headlength=5, headaxislength=12, color='#ba4f45', ax=ax, label='Blocked')
579 |     
580 |     # setup the legend
581 |     ax.legend(facecolor=pitch_color, handlelength=5, edgecolor='None', fontsize=8, loc='upper left', shadow=True, labelcolor='white')
582 |     
583 |     # Set the title
584 |     fig.suptitle(f'{team} Passes vs {opponent}', y=1, fontsize=15)
585 |     
586 |     
587 |     # Set the subtitle
588 |     ax.set_title('Data : Whoscored/Opta', fontsize=8, loc='right', fontstyle='italic', fontweight='bold')
589 |     
590 |     
591 |     # Set the figure facecolor
592 |     
593 |     fig.set_facecolor(pitch_color)
594 |     
595 |     
596 |     
597 |     
598 |     
599 | 
600 | def normalize(values, bounds):
601 |     return [bounds['desired']['lower'] + (x - bounds['actual']['lower']) * (bounds['desired']['upper'] 
602 |             - bounds['desired']['lower']) / (bounds['actual']['upper'] - bounds['actual']['lower']) for x in values]
603 | 
604 | 
605 | 
606 | 
607 |     
608 | def createPVFormationMap(match_data, events_df, team, color_palette,
609 |                         markerstyle, markersize, markeredgewidth, labelsize, labelcolor, ax):
610 |     
611 |     # getting team id and venue
612 |     if match_data['home']['name'] == team:
613 |         teamId = match_data['home']['teamId']
614 |         venue = 'home'
615 |     else:
616 |         teamId = match_data['away']['teamId']
617 |         venue = 'away'
618 | 
619 | 
620 |     # getting opponent   
621 |     if venue == 'home':
622 |         opponent = match_data['away']['name']
623 |     else:
624 |         opponent = match_data['home']['name']
625 | 
626 | 
627 |     # getting player dictionary
628 |     team_players_dict = {}
629 |     for player in match_data[venue]['players']:
630 |         team_players_dict[player['playerId']] = player['name']
631 | 
632 | 
633 |     # getting minute of first substitution
634 |     for i,row in events_df.iterrows():
635 |         if row['type'] == 'SubstitutionOn' and row['teamId'] == teamId:
636 |             sub_minute = str(row['minute'])
637 |             break
638 | 
639 | 
640 |     # getting players dataframe
641 |     match_players_df = pd.DataFrame()
642 |     player_names = []
643 |     player_ids = []
644 |     player_pos = []
645 |     player_kit_number = []
646 | 
647 |     for player in match_data[venue]['players']:
648 |         player_names.append(player['name'])
649 |         player_ids.append(player['playerId'])
650 |         player_pos.append(player['position'])
651 |         player_kit_number.append(player['shirtNo'])
652 | 
653 |     match_players_df['playerId'] = player_ids
654 |     match_players_df['playerName'] = player_names
655 |     match_players_df['playerPos'] = player_pos
656 |     match_players_df['playerKitNumber'] = player_kit_number
657 | 
658 | 
659 |     # extracting passes
660 |     passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1)
661 |     passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64')
662 |     if 'playerName' in passes_df.columns:
663 |         passes_df = passes_df.drop(columns='playerName')
664 |     passes_df.dropna(subset=["playerId"], inplace=True)
665 |     passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])])
666 |     if 'passRecipientId' in passes_df.columns:
667 |         passes_df = passes_df.drop(columns='passRecipientId')
668 |         passes_df = passes_df.drop(columns='passRecipientName')
669 |     passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1))  
670 |     passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1))  
671 |     passes_df.dropna(subset=["passRecipientName"], inplace=True)
672 |     passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True)
673 |     passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True)
674 |     index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index
675 |     passes_df.drop(index_names, inplace=True)
676 |     passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1')
677 |     passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'},
678 |                                                         axis='columns'), on=['passRecipientId', 'passRecipientName'],
679 |                                                         how='left', validate='m:1', suffixes=['', 'Receipt'])
680 |     # passes_df = passes_df[passes_df['playerPos'] != 'Sub']
681 |     
682 |     
683 |     # Getting net possesion value for passes
684 |     netPVPassed = passes_df.groupby(['playerId', 'playerName'])['EPV'].sum().reset_index()
685 |     netPVReceived = passes_df.groupby(['passRecipientId', 'passRecipientName'])['EPV'].sum().reset_index()
686 |     
687 | 
688 |     
689 |     # Getting formation and player ids for first 11
690 |     formation = match_data[venue]['formations'][0]['formationName']
691 |     formation_positions = match_data[venue]['formations'][0]['formationPositions']
692 |     playerIds = match_data[venue]['formations'][0]['playerIds'][:11]
693 | 
694 |     
695 |     # Getting all data in a dataframe
696 |     formation_data = []
697 |     for playerId, pos in zip(playerIds, formation_positions):
698 |         pl_dict = {'playerId': playerId}
699 |         pl_dict.update(pos)
700 |         formation_data.append(pl_dict)
701 |     formation_data = pd.DataFrame(formation_data)
702 |     formation_data['vertical'] = normalize(formation_data['vertical'], 
703 |                                            {'actual': {'lower': 0, 'upper': 10}, 'desired': {'lower': 10, 'upper': 110}})
704 |     formation_data['horizontal'] = normalize(formation_data['horizontal'],
705 |                                              {'actual': {'lower': 0, 'upper': 10}, 'desired': {'lower': 80, 'upper': 0}})
706 |     formation_data = netPVPassed.join(formation_data.set_index('playerId'), on='playerId', how='inner').reset_index(drop=True)
707 |     formation_data = formation_data.rename(columns={"EPV": "PV"})
708 | 
709 | 
710 |     # Plotting
711 |     pitch = Pitch(pitch_type='statsbomb', pitch_color='#171717', line_color='#5c5c5c',
712 |                   goal_type='box')
713 |     pitch.draw(ax=ax, constrained_layout=True, tight_layout=True)
714 |     
715 |     sns.scatterplot(x='vertical', y='horizontal', data=formation_data, hue='PV', s=markersize, marker=markerstyle, legend=False, 
716 |                     palette=color_palette, linewidth=markeredgewidth, ax=ax)
717 |     
718 |     ax.text(2, 78, '{}'.format('-'.join(formation)), size=20, c='grey')
719 |     
720 |     for index, row in formation_data.iterrows():
721 |         pitch.annotate(str(round(row.PV*100,2))+'%', xy=(row.vertical, row.horizontal), c=labelcolor, va='center',
722 |                        ha='center', size=labelsize, zorder=2, weight='bold', ax=ax)
723 |         pitch.annotate(row.playerName, xy=(row.vertical, row.horizontal+5), c=labelcolor, va='center',
724 |                        ha='center', size=labelsize, zorder=2, weight='bold', ax=ax)
725 |         
726 |         
727 | 
728 | 


--------------------------------------------------------------------------------