├── .gitignore ├── EPV_grid.csv ├── LICENSE ├── README.md ├── data └── events.csv ├── linecollection.py ├── logo.jpg ├── main.py ├── requirements.txt ├── test.py ├── tutorial.ipynb ├── utils.py └── visuals.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .ipynb_checkpoints 3 | */.ipynb_checkpoints/* 4 | -------------------------------------------------------------------------------- /EPV_grid.csv: -------------------------------------------------------------------------------- 1 | 0.0046,0.0046,0.0045,0.0046,0.0048,0.0051,0.0054,0.0057,0.0059,0.0062,0.0066,0.0068,0.0071,0.0072,0.0073,0.0077,0.0082,0.0086,0.0088,0.0092,0.0096,0.0100,0.0104,0.0110,0.0117,0.0123,0.0127,0.0131,0.0135,0.0142,0.0150,0.0160,0.0170,0.0181,0.0192,0.0208,0.0227,0.0244,0.0259,0.0276,0.0297,0.0313,0.0326,0.0341,0.0356,0.0365,0.0368,0.0388,0.0424,0.0443 2 | 0.0044,0.0044,0.0045,0.0046,0.0049,0.0052,0.0055,0.0058,0.0060,0.0062,0.0066,0.0069,0.0072,0.0074,0.0075,0.0079,0.0084,0.0087,0.0090,0.0093,0.0097,0.0102,0.0106,0.0112,0.0118,0.0123,0.0128,0.0132,0.0136,0.0143,0.0152,0.0162,0.0171,0.0183,0.0196,0.0211,0.0229,0.0246,0.0261,0.0278,0.0298,0.0314,0.0327,0.0342,0.0359,0.0368,0.0370,0.0388,0.0421,0.0437 3 | 0.0041,0.0042,0.0044,0.0047,0.0052,0.0056,0.0058,0.0060,0.0061,0.0063,0.0067,0.0070,0.0074,0.0076,0.0079,0.0083,0.0087,0.0090,0.0092,0.0095,0.0101,0.0106,0.0111,0.0115,0.0119,0.0124,0.0130,0.0135,0.0139,0.0146,0.0156,0.0165,0.0174,0.0186,0.0202,0.0218,0.0232,0.0248,0.0265,0.0283,0.0300,0.0315,0.0328,0.0345,0.0366,0.0376,0.0376,0.0388,0.0413,0.0425 4 | 0.0041,0.0042,0.0045,0.0049,0.0054,0.0058,0.0061,0.0062,0.0062,0.0064,0.0068,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0092,0.0094,0.0098,0.0104,0.0109,0.0114,0.0118,0.0122,0.0126,0.0133,0.0138,0.0142,0.0150,0.0160,0.0170,0.0179,0.0192,0.0209,0.0224,0.0237,0.0252,0.0268,0.0284,0.0300,0.0316,0.0332,0.0351,0.0373,0.0386,0.0392,0.0400,0.0411,0.0417 5 | 0.0044,0.0045,0.0048,0.0052,0.0056,0.0060,0.0062,0.0064,0.0065,0.0067,0.0070,0.0073,0.0077,0.0080,0.0083,0.0087,0.0092,0.0095,0.0097,0.0101,0.0107,0.0113,0.0117,0.0121,0.0125,0.0129,0.0136,0.0141,0.0146,0.0153,0.0164,0.0175,0.0186,0.0200,0.0215,0.0230,0.0243,0.0256,0.0269,0.0282,0.0296,0.0315,0.0339,0.0361,0.0380,0.0400,0.0418,0.0424,0.0416,0.0412 6 | 0.0045,0.0047,0.0051,0.0054,0.0057,0.0060,0.0063,0.0066,0.0067,0.0069,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0094,0.0098,0.0100,0.0104,0.0111,0.0116,0.0120,0.0124,0.0127,0.0131,0.0138,0.0144,0.0149,0.0157,0.0169,0.0180,0.0191,0.0204,0.0219,0.0234,0.0248,0.0261,0.0274,0.0287,0.0300,0.0323,0.0357,0.0382,0.0399,0.0421,0.0447,0.0452,0.0438,0.0430 7 | 0.0047,0.0049,0.0053,0.0056,0.0057,0.0060,0.0064,0.0067,0.0068,0.0070,0.0075,0.0078,0.0080,0.0083,0.0087,0.0091,0.0096,0.0100,0.0103,0.0108,0.0115,0.0119,0.0123,0.0125,0.0127,0.0132,0.0140,0.0147,0.0153,0.0162,0.0174,0.0184,0.0192,0.0204,0.0221,0.0237,0.0251,0.0267,0.0285,0.0299,0.0311,0.0340,0.0387,0.0416,0.0429,0.0449,0.0476,0.0486,0.0477,0.0473 8 | 0.0047,0.0050,0.0054,0.0057,0.0059,0.0062,0.0067,0.0070,0.0070,0.0072,0.0077,0.0080,0.0082,0.0085,0.0090,0.0094,0.0098,0.0102,0.0106,0.0111,0.0117,0.0122,0.0125,0.0128,0.0131,0.0136,0.0143,0.0151,0.0157,0.0166,0.0177,0.0187,0.0195,0.0207,0.0223,0.0238,0.0253,0.0273,0.0299,0.0321,0.0341,0.0373,0.0417,0.0446,0.0460,0.0489,0.0534,0.0546,0.0525,0.0514 9 | 0.0048,0.0050,0.0055,0.0059,0.0062,0.0066,0.0071,0.0074,0.0074,0.0076,0.0079,0.0082,0.0085,0.0088,0.0092,0.0095,0.0100,0.0104,0.0107,0.0112,0.0119,0.0124,0.0128,0.0132,0.0136,0.0142,0.0148,0.0155,0.0163,0.0171,0.0179,0.0188,0.0199,0.0211,0.0225,0.0238,0.0252,0.0278,0.0316,0.0353,0.0390,0.0421,0.0447,0.0471,0.0492,0.0541,0.0619,0.0633,0.0582,0.0556 10 | 0.0050,0.0052,0.0058,0.0062,0.0065,0.0069,0.0074,0.0077,0.0077,0.0078,0.0081,0.0084,0.0088,0.0091,0.0094,0.0097,0.0102,0.0106,0.0109,0.0114,0.0120,0.0125,0.0131,0.0136,0.0141,0.0146,0.0152,0.0158,0.0166,0.0173,0.0180,0.0189,0.0201,0.0214,0.0227,0.0243,0.0263,0.0293,0.0333,0.0373,0.0413,0.0452,0.0490,0.0540,0.0600,0.0655,0.0703,0.0702,0.0651,0.0626 11 | 0.0053,0.0057,0.0063,0.0067,0.0068,0.0071,0.0076,0.0079,0.0079,0.0081,0.0083,0.0086,0.0089,0.0093,0.0096,0.0100,0.0103,0.0107,0.0113,0.0117,0.0121,0.0126,0.0132,0.0138,0.0145,0.0150,0.0155,0.0161,0.0168,0.0174,0.0180,0.0188,0.0200,0.0213,0.0229,0.0253,0.0286,0.0318,0.0349,0.0380,0.0410,0.0465,0.0546,0.0653,0.0786,0.0829,0.0784,0.0752,0.0734,0.0725 12 | 0.0060,0.0062,0.0067,0.0070,0.0071,0.0074,0.0078,0.0080,0.0081,0.0082,0.0084,0.0087,0.0091,0.0095,0.0098,0.0101,0.0105,0.0109,0.0115,0.0119,0.0123,0.0128,0.0134,0.0140,0.0146,0.0152,0.0156,0.0163,0.0171,0.0178,0.0183,0.0191,0.0201,0.0214,0.0230,0.0259,0.0300,0.0335,0.0362,0.0395,0.0432,0.0514,0.0639,0.0779,0.0934,0.0998,0.0974,0.0967,0.0980,0.0986 13 | 0.0069,0.0069,0.0069,0.0071,0.0074,0.0077,0.0080,0.0082,0.0081,0.0082,0.0085,0.0089,0.0093,0.0096,0.0099,0.0102,0.0106,0.0111,0.0116,0.0121,0.0125,0.0130,0.0136,0.0141,0.0145,0.0150,0.0156,0.0165,0.0177,0.0185,0.0189,0.0196,0.0205,0.0216,0.0230,0.0260,0.0306,0.0344,0.0373,0.0419,0.0481,0.0597,0.0769,0.0918,0.1045,0.1162,0.1271,0.1347,0.1390,0.1412 14 | 0.0077,0.0075,0.0072,0.0072,0.0076,0.0079,0.0082,0.0083,0.0081,0.0082,0.0086,0.0089,0.0093,0.0096,0.0099,0.0103,0.0107,0.0112,0.0116,0.0121,0.0126,0.0131,0.0137,0.0140,0.0142,0.0147,0.0156,0.0165,0.0175,0.0183,0.0190,0.0198,0.0208,0.0219,0.0232,0.0262,0.0310,0.0352,0.0386,0.0454,0.0555,0.0698,0.0882,0.1038,0.1165,0.1362,0.1628,0.1982,0.2425,0.2647 15 | 0.0082,0.0080,0.0075,0.0075,0.0078,0.0081,0.0084,0.0084,0.0082,0.0083,0.0085,0.0088,0.0092,0.0096,0.0099,0.0103,0.0108,0.0112,0.0115,0.0119,0.0125,0.0130,0.0136,0.0138,0.0136,0.0142,0.0156,0.0164,0.0166,0.0173,0.0184,0.0196,0.0210,0.0223,0.0235,0.0265,0.0313,0.0358,0.0400,0.0500,0.0656,0.0816,0.0979,0.1139,0.1296,0.1598,0.2044,0.2873,0.4085,0.4691 16 | 0.0085,0.0083,0.0077,0.0076,0.0079,0.0082,0.0085,0.0085,0.0083,0.0083,0.0085,0.0088,0.0092,0.0095,0.0099,0.0103,0.0108,0.0112,0.0114,0.0118,0.0124,0.0130,0.0136,0.0137,0.0133,0.0139,0.0156,0.0164,0.0162,0.0168,0.0181,0.0195,0.0211,0.0225,0.0237,0.0267,0.0315,0.0362,0.0408,0.0523,0.0707,0.0875,0.1027,0.1190,0.1362,0.1716,0.2252,0.3319,0.4915,0.5714 17 | 0.0085,0.0083,0.0077,0.0076,0.0079,0.0082,0.0085,0.0085,0.0083,0.0083,0.0085,0.0088,0.0092,0.0095,0.0099,0.0103,0.0108,0.0112,0.0114,0.0118,0.0124,0.0130,0.0136,0.0137,0.0133,0.0139,0.0156,0.0164,0.0162,0.0168,0.0181,0.0195,0.0211,0.0225,0.0237,0.0267,0.0315,0.0362,0.0408,0.0523,0.0707,0.0875,0.1027,0.1190,0.1362,0.1716,0.2252,0.3319,0.4915,0.5714 18 | 0.0082,0.0080,0.0075,0.0075,0.0078,0.0081,0.0084,0.0084,0.0082,0.0083,0.0085,0.0088,0.0092,0.0096,0.0099,0.0103,0.0108,0.0112,0.0115,0.0119,0.0125,0.0130,0.0136,0.0138,0.0136,0.0142,0.0156,0.0164,0.0166,0.0173,0.0184,0.0196,0.0210,0.0223,0.0235,0.0265,0.0313,0.0358,0.0400,0.0500,0.0656,0.0816,0.0979,0.1139,0.1296,0.1598,0.2044,0.2873,0.4085,0.4691 19 | 0.0077,0.0075,0.0072,0.0072,0.0076,0.0079,0.0082,0.0083,0.0081,0.0082,0.0086,0.0089,0.0093,0.0096,0.0099,0.0103,0.0107,0.0112,0.0116,0.0121,0.0126,0.0131,0.0137,0.0140,0.0142,0.0147,0.0156,0.0165,0.0175,0.0183,0.0190,0.0198,0.0208,0.0219,0.0232,0.0262,0.0310,0.0352,0.0386,0.0454,0.0555,0.0698,0.0882,0.1038,0.1165,0.1362,0.1628,0.1982,0.2425,0.2647 20 | 0.0069,0.0069,0.0069,0.0071,0.0074,0.0077,0.0080,0.0082,0.0081,0.0082,0.0085,0.0089,0.0093,0.0096,0.0099,0.0102,0.0106,0.0111,0.0116,0.0121,0.0125,0.0130,0.0136,0.0141,0.0145,0.0150,0.0156,0.0165,0.0177,0.0185,0.0189,0.0196,0.0205,0.0216,0.0230,0.0260,0.0306,0.0344,0.0373,0.0419,0.0481,0.0597,0.0769,0.0918,0.1045,0.1162,0.1271,0.1347,0.1390,0.1412 21 | 0.0060,0.0062,0.0067,0.0070,0.0071,0.0074,0.0078,0.0080,0.0081,0.0082,0.0084,0.0087,0.0091,0.0095,0.0098,0.0101,0.0105,0.0109,0.0115,0.0119,0.0123,0.0128,0.0134,0.0140,0.0146,0.0152,0.0156,0.0163,0.0171,0.0178,0.0183,0.0191,0.0201,0.0214,0.0230,0.0259,0.0300,0.0335,0.0362,0.0395,0.0432,0.0514,0.0639,0.0779,0.0934,0.0998,0.0974,0.0967,0.0980,0.0986 22 | 0.0053,0.0057,0.0063,0.0067,0.0068,0.0071,0.0076,0.0079,0.0079,0.0081,0.0083,0.0086,0.0089,0.0093,0.0096,0.0100,0.0103,0.0107,0.0113,0.0117,0.0121,0.0126,0.0132,0.0138,0.0145,0.0150,0.0155,0.0161,0.0168,0.0174,0.0180,0.0188,0.0200,0.0213,0.0229,0.0253,0.0286,0.0318,0.0349,0.0380,0.0410,0.0465,0.0546,0.0653,0.0786,0.0829,0.0784,0.0752,0.0734,0.0725 23 | 0.0050,0.0052,0.0058,0.0062,0.0065,0.0069,0.0074,0.0077,0.0077,0.0078,0.0081,0.0084,0.0088,0.0091,0.0094,0.0097,0.0102,0.0106,0.0109,0.0114,0.0120,0.0125,0.0131,0.0136,0.0141,0.0146,0.0152,0.0158,0.0166,0.0173,0.0180,0.0189,0.0201,0.0214,0.0227,0.0243,0.0263,0.0293,0.0333,0.0373,0.0413,0.0452,0.0490,0.0540,0.0600,0.0655,0.0703,0.0702,0.0651,0.0626 24 | 0.0048,0.0050,0.0055,0.0059,0.0062,0.0066,0.0071,0.0074,0.0074,0.0076,0.0079,0.0082,0.0085,0.0088,0.0092,0.0095,0.0100,0.0104,0.0107,0.0112,0.0119,0.0124,0.0128,0.0132,0.0136,0.0142,0.0148,0.0155,0.0163,0.0171,0.0179,0.0188,0.0199,0.0211,0.0225,0.0238,0.0252,0.0278,0.0316,0.0353,0.0390,0.0421,0.0447,0.0471,0.0492,0.0541,0.0619,0.0633,0.0582,0.0556 25 | 0.0047,0.0050,0.0054,0.0057,0.0059,0.0062,0.0067,0.0070,0.0070,0.0072,0.0077,0.0080,0.0082,0.0085,0.0090,0.0094,0.0098,0.0102,0.0106,0.0111,0.0117,0.0122,0.0125,0.0128,0.0131,0.0136,0.0143,0.0151,0.0157,0.0166,0.0177,0.0187,0.0195,0.0207,0.0223,0.0238,0.0253,0.0273,0.0299,0.0321,0.0341,0.0373,0.0417,0.0446,0.0460,0.0489,0.0534,0.0546,0.0525,0.0514 26 | 0.0047,0.0049,0.0053,0.0056,0.0057,0.0060,0.0064,0.0067,0.0068,0.0070,0.0075,0.0078,0.0080,0.0083,0.0087,0.0091,0.0096,0.0100,0.0103,0.0108,0.0115,0.0119,0.0123,0.0125,0.0127,0.0132,0.0140,0.0147,0.0153,0.0162,0.0174,0.0184,0.0192,0.0204,0.0221,0.0237,0.0251,0.0267,0.0285,0.0299,0.0311,0.0340,0.0387,0.0416,0.0429,0.0449,0.0476,0.0486,0.0477,0.0473 27 | 0.0045,0.0047,0.0051,0.0054,0.0057,0.0060,0.0063,0.0066,0.0067,0.0069,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0094,0.0098,0.0100,0.0104,0.0111,0.0116,0.0120,0.0124,0.0127,0.0131,0.0138,0.0144,0.0149,0.0157,0.0169,0.0180,0.0191,0.0204,0.0219,0.0234,0.0248,0.0261,0.0274,0.0287,0.0300,0.0323,0.0357,0.0382,0.0399,0.0421,0.0447,0.0452,0.0438,0.0430 28 | 0.0044,0.0045,0.0048,0.0052,0.0056,0.0060,0.0062,0.0064,0.0065,0.0067,0.0070,0.0073,0.0077,0.0080,0.0083,0.0087,0.0092,0.0095,0.0097,0.0101,0.0107,0.0113,0.0117,0.0121,0.0125,0.0129,0.0136,0.0141,0.0146,0.0153,0.0164,0.0175,0.0186,0.0200,0.0215,0.0230,0.0243,0.0256,0.0269,0.0282,0.0296,0.0315,0.0339,0.0361,0.0380,0.0400,0.0418,0.0424,0.0416,0.0412 29 | 0.0041,0.0042,0.0045,0.0049,0.0054,0.0058,0.0061,0.0062,0.0062,0.0064,0.0068,0.0072,0.0075,0.0079,0.0082,0.0085,0.0089,0.0092,0.0094,0.0098,0.0104,0.0109,0.0114,0.0118,0.0122,0.0126,0.0133,0.0138,0.0142,0.0150,0.0160,0.0170,0.0179,0.0192,0.0209,0.0224,0.0237,0.0252,0.0268,0.0284,0.0300,0.0316,0.0332,0.0351,0.0373,0.0386,0.0392,0.0400,0.0411,0.0417 30 | 0.0041,0.0042,0.0044,0.0047,0.0052,0.0056,0.0058,0.0060,0.0061,0.0063,0.0067,0.0070,0.0074,0.0076,0.0079,0.0083,0.0087,0.0090,0.0092,0.0095,0.0101,0.0106,0.0111,0.0115,0.0119,0.0124,0.0130,0.0135,0.0139,0.0146,0.0156,0.0165,0.0174,0.0186,0.0202,0.0218,0.0232,0.0248,0.0265,0.0283,0.0300,0.0315,0.0328,0.0345,0.0366,0.0376,0.0376,0.0388,0.0413,0.0425 31 | 0.0044,0.0044,0.0045,0.0046,0.0049,0.0052,0.0055,0.0058,0.0060,0.0062,0.0066,0.0069,0.0072,0.0074,0.0075,0.0079,0.0084,0.0087,0.0090,0.0093,0.0097,0.0102,0.0106,0.0112,0.0118,0.0123,0.0128,0.0132,0.0136,0.0143,0.0152,0.0162,0.0171,0.0183,0.0196,0.0211,0.0229,0.0246,0.0261,0.0278,0.0298,0.0314,0.0327,0.0342,0.0359,0.0368,0.0370,0.0388,0.0421,0.0437 32 | 0.0046,0.0046,0.0045,0.0046,0.0048,0.0051,0.0054,0.0057,0.0059,0.0062,0.0066,0.0068,0.0071,0.0072,0.0073,0.0077,0.0082,0.0086,0.0088,0.0092,0.0096,0.0100,0.0104,0.0110,0.0117,0.0123,0.0127,0.0131,0.0135,0.0142,0.0150,0.0160,0.0170,0.0181,0.0192,0.0208,0.0227,0.0244,0.0259,0.0276,0.0297,0.0313,0.0326,0.0341,0.0356,0.0365,0.0368,0.0388,0.0424,0.0443 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ali Hasan Khan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scraping Whoscored Event Data 2 | ![alt text](https://github.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data/blob/main/logo.jpg "Whoscored") 3 | 4 | Tool to scrape match event data from [Whoscored](http://whoscored.com/ "Whoscored")'s chalkboard using **Selenium**. 5 | 6 | Installation: 7 | 1) `git clone https://github.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data.git` 8 | 9 | 2) `pip install -r requirements.txt` 10 | 11 | 3) For some additional visual customisations replace **linecollection.py** with the one present in mplsoccer folder on your machine (somewhere here: ~\anaconda3\Lib\site-packages\mplsoccer). [Recommended for inverted gradient effect in pass maps] 12 | 13 | 4) Follow **tutorial.ipynb** for guide. 14 | 15 | 16 | 17 | Reach me [here](https://twitter.com/rockingAli5) for any kind of help :) 18 | 19 | Special thanks to [Laurie Shaw](https://twitter.com/EightyFivePoint) for Expected Possession Value model ([check out his work here](http://eightyfivepoints.blogspot.com/)). 20 | 21 | For any help/suggestion regarding mplsoccer reach out to the creators: [Andy](https://twitter.com/numberstorm), [Anmol](https://twitter.com/slothfulwave612). 22 | -------------------------------------------------------------------------------- /linecollection.py: -------------------------------------------------------------------------------- 1 | """ A module with functions for using LineCollection to create lines.´´.""" 2 | 3 | import warnings 4 | 5 | import numpy as np 6 | from matplotlib import rcParams 7 | from matplotlib.cm import get_cmap 8 | from matplotlib.collections import LineCollection 9 | from matplotlib.colors import to_rgba_array 10 | from matplotlib.legend import Legend 11 | from matplotlib.legend_handler import HandlerLineCollection 12 | 13 | from mplsoccer.cm import create_transparent_cmap 14 | from mplsoccer.utils import validate_ax 15 | 16 | __all__ = ['lines'] 17 | 18 | 19 | def lines(xstart, ystart, xend, yend, color=None, n_segments=100, 20 | comet=False, opp_comet=False, transparent=False, opp_transparent=False, alpha_start=0.01, 21 | alpha_end=1, cmap=None, ax=None, vertical=False, reverse_cmap=False, **kwargs): 22 | """ Plots lines using matplotlib.collections.LineCollection. 23 | This is a fast way to plot multiple lines without loops. 24 | Also enables lines that increase in width or opacity by splitting 25 | the line into n_segments of increasing 26 | width or opacity as the line progresses. 27 | 28 | Parameters 29 | ---------- 30 | xstart, ystart, xend, yend: array-like or scalar. 31 | Commonly, these parameters are 1D arrays. 32 | These should be the start and end coordinates of the lines. 33 | color : A matplotlib color or sequence of colors, defaults to None. 34 | Defaults to None. In that case the marker color is determined 35 | by the value rcParams['lines.color'] 36 | n_segments : int, default 100 37 | If comet=True or transparent=True this is used to split the line 38 | into n_segments of increasing width/opacity. 39 | comet : bool default False 40 | Whether to plot the lines increasing in width. 41 | opp_comet : bool default False 42 | Whether to plot the lines decreasing in width. 43 | transparent : bool, default False 44 | Whether to plot the lines increasing in opacity. 45 | opp_transparent : bool, default False 46 | Whether to plot the lines decreasing in opacity. 47 | linewidth or lw : array-like or scalar, default 5. 48 | Multiple linewidths not supported for the comet or transparent lines. 49 | alpha_start: float, default 0.01 50 | The starting alpha value for transparent lines, between 0 (transparent) and 1 (opaque). 51 | If transparent = True the line will be drawn to 52 | linearly increase in opacity between alpha_start and alpha_end. 53 | alpha_end : float, default 1 54 | The ending alpha value for transparent lines, between 0 (transparent) and 1 (opaque). 55 | If transparent = True the line will be drawn to 56 | linearly increase in opacity between alpha_start and alpha_end. 57 | cmap : str, default None 58 | A matplotlib cmap (colormap) name 59 | vertical : bool, default False 60 | If the orientation is vertical (True), then the code switches the x and y coordinates. 61 | reverse_cmap : bool, default False 62 | Whether to reverse the cmap colors. 63 | If the pitch is horizontal and the y-axis is inverted then set this to True. 64 | ax : matplotlib.axes.Axes, default None 65 | The axis to plot on. 66 | **kwargs : All other keyword arguments are passed on to matplotlib.collections.LineCollection. 67 | 68 | Returns 69 | ------- 70 | LineCollection : matplotlib.collections.LineCollection 71 | 72 | Examples 73 | -------- 74 | >>> from mplsoccer import Pitch 75 | >>> pitch = Pitch() 76 | >>> fig, ax = pitch.draw() 77 | >>> pitch.lines(20, 20, 45, 80, comet=True, transparent=True, ax=ax) 78 | 79 | >>> from mplsoccer.linecollection import lines 80 | >>> import matplotlib.pyplot as plt 81 | >>> fig, ax = plt.subplots() 82 | >>> lines([0.1, 0.4], [0.1, 0.5], [0.9, 0.4], [0.8, 0.8], ax=ax) 83 | """ 84 | validate_ax(ax) 85 | if not isinstance(comet, bool): 86 | raise TypeError("Invalid argument: comet should be bool (True or False).") 87 | if not isinstance(transparent, bool): 88 | raise TypeError("Invalid argument: transparent should be bool (True or False).") 89 | 90 | if alpha_start < 0 or alpha_start > 1: 91 | raise TypeError("alpha_start values should be within 0-1 range") 92 | if alpha_end < 0 or alpha_end > 1: 93 | raise TypeError("alpha_end values should be within 0-1 range") 94 | if alpha_start > alpha_end: 95 | msg = "Alpha start > alpha end. The line will increase in transparency nearer to the end" 96 | warnings.warn(msg) 97 | 98 | if 'colors' in kwargs.keys(): 99 | warnings.warn("lines method takes 'color' as an argument, 'colors' in ignored") 100 | 101 | if color is not None and cmap is not None: 102 | raise ValueError("Only use one of color or cmap arguments not both.") 103 | 104 | if 'lw' in kwargs.keys() and 'linewidth' in kwargs.keys(): 105 | raise TypeError("lines got multiple values for 'linewidth' argument (linewidth and lw).") 106 | 107 | # set linewidth 108 | if 'lw' in kwargs.keys(): 109 | lw = kwargs.pop('lw', 5) 110 | elif 'linewidth' in kwargs.keys(): 111 | lw = kwargs.pop('linewidth', 5) 112 | else: 113 | lw = 5 114 | 115 | # to arrays 116 | xstart = np.ravel(xstart) 117 | ystart = np.ravel(ystart) 118 | xend = np.ravel(xend) 119 | yend = np.ravel(yend) 120 | lw = np.ravel(lw) 121 | 122 | if (comet or transparent) and (lw.size > 1): 123 | msg = "Multiple linewidths with a comet or transparent line is not implemented." 124 | raise NotImplementedError(msg) 125 | 126 | # set color 127 | if color is None and cmap is None: 128 | color = rcParams['lines.color'] 129 | 130 | if (comet or transparent) and (cmap is None) and (to_rgba_array(color).shape[0] > 1): 131 | msg = "Multiple colors with a comet or transparent line is not implemented." 132 | raise NotImplementedError(msg) 133 | 134 | if xstart.size != ystart.size: 135 | raise ValueError("xstart and ystart must be the same size") 136 | if xstart.size != xend.size: 137 | raise ValueError("xstart and xend must be the same size") 138 | if ystart.size != yend.size: 139 | raise ValueError("ystart and yend must be the same size") 140 | 141 | if (lw.size > 1) and (lw.size != xstart.size): 142 | raise ValueError("lw and xstart must be the same size") 143 | 144 | if lw.size == 1: 145 | lw = lw[0] 146 | 147 | if vertical: 148 | ystart, xstart = xstart, ystart 149 | yend, xend = xend, yend 150 | 151 | # create linewidth 152 | if comet: 153 | lw = np.linspace(1, lw, n_segments) 154 | handler_first_lw = False 155 | else: 156 | handler_first_lw = True 157 | 158 | if opp_comet: 159 | lw = np.linspace(lw, 1, n_segments) 160 | 161 | if (transparent is False) and (comet is False) and (cmap is None): 162 | multi_segment = False 163 | else: 164 | multi_segment = True 165 | 166 | if transparent: 167 | cmap = create_transparent_cmap(color, cmap, n_segments, alpha_start, alpha_end) 168 | 169 | if opp_transparent: 170 | cmap = create_transparent_cmap(color, cmap, n_segments, alpha_start, alpha_end) 171 | cmap = cmap.reversed() 172 | 173 | if isinstance(cmap, str): 174 | cmap = get_cmap(cmap) 175 | 176 | if cmap is not None: 177 | handler_cmap = True 178 | line_collection = _lines_cmap(xstart, ystart, xend, yend, lw=lw, cmap=cmap, 179 | ax=ax, n_segments=n_segments, multi_segment=multi_segment, 180 | reverse_cmap=reverse_cmap, **kwargs) 181 | else: 182 | handler_cmap = False 183 | line_collection = _lines_no_cmap(xstart, ystart, xend, yend, 184 | lw=lw, color=color, ax=ax, n_segments=n_segments, 185 | multi_segment=multi_segment, **kwargs) 186 | 187 | line_collection_handler = HandlerLines(numpoints=n_segments, invert_y=reverse_cmap, 188 | first_lw=handler_first_lw, use_cmap=handler_cmap) 189 | Legend.update_default_handler_map({line_collection: line_collection_handler}) 190 | 191 | return line_collection 192 | 193 | 194 | def _create_segments(xstart, ystart, xend, yend, n_segments=100, multi_segment=False): 195 | if multi_segment: 196 | x = np.linspace(xstart, xend, n_segments + 1) 197 | y = np.linspace(ystart, yend, n_segments + 1) 198 | points = np.array([x, y]).T 199 | points = np.concatenate([points, np.expand_dims(points[:, -1, :], 1)], axis=1) 200 | points = np.expand_dims(points, 1) 201 | segments = np.concatenate([points[:, :, :-2, :], 202 | points[:, :, 1:-1, :], 203 | points[:, :, 2:, :]], axis=1) 204 | segments = np.transpose(segments, (0, 2, 1, 3)).reshape((-1, 3, 2)) 205 | else: 206 | segments = np.transpose(np.array([[xstart, ystart], [xend, yend]]), (2, 0, 1)) 207 | return segments 208 | 209 | 210 | def _lines_no_cmap(xstart, ystart, xend, yend, lw=None, color=None, ax=None, 211 | n_segments=100, multi_segment=False, **kwargs): 212 | segments = _create_segments(xstart, ystart, xend, yend, 213 | n_segments=n_segments, multi_segment=multi_segment) 214 | color = to_rgba_array(color) 215 | if (color.shape[0] > 1) and (color.shape[0] != xstart.size): 216 | raise ValueError("xstart and color must be the same size") 217 | line_collection = LineCollection(segments, color=color, linewidth=lw, snap=False, **kwargs) 218 | line_collection = ax.add_collection(line_collection) 219 | return line_collection 220 | 221 | 222 | def _lines_cmap(xstart, ystart, xend, yend, lw=None, cmap=None, ax=None, 223 | n_segments=100, multi_segment=False, reverse_cmap=False, **kwargs): 224 | segments = _create_segments(xstart, ystart, xend, yend, 225 | n_segments=n_segments, multi_segment=multi_segment) 226 | if reverse_cmap: 227 | cmap = cmap.reversed() 228 | line_collection = LineCollection(segments, cmap=cmap, linewidth=lw, snap=False, **kwargs) 229 | line_collection = ax.add_collection(line_collection) 230 | extent = ax.get_ylim() 231 | pitch_array = np.linspace(extent[0], extent[1], n_segments) 232 | line_collection.set_array(pitch_array) 233 | return line_collection 234 | 235 | 236 | # Amended from 237 | # https://stackoverflow.com/questions/49223702/adding-a-legend-to-a-matplotlib-plot-with-a-multicolored-line?rq=1 238 | class HandlerLines(HandlerLineCollection): 239 | """Automatically generated by Pitch.lines() to allow use of linecollection in legend. 240 | """ 241 | 242 | def __init__(self, invert_y=False, first_lw=False, use_cmap=False, 243 | marker_pad=0.3, numpoints=None, **kw): 244 | HandlerLineCollection.__init__(self, marker_pad=marker_pad, numpoints=numpoints, **kw) 245 | self.invert_y = invert_y 246 | self.first_lw = first_lw 247 | self.use_cmap = use_cmap 248 | 249 | def create_artists(self, legend, artist, xdescent, ydescent, 250 | width, height, fontsize, trans): 251 | x = np.linspace(0, width, self.get_numpoints(legend) + 1) 252 | y = np.zeros(self.get_numpoints(legend) + 1) + height / 2. - ydescent 253 | points = np.array([x, y]).T.reshape(-1, 1, 2) 254 | segments = np.concatenate([points[:-1], points[1:]], axis=1) 255 | lw = artist.get_linewidth() 256 | if self.first_lw: 257 | lw = lw[0] 258 | if self.use_cmap: 259 | cmap = artist.cmap 260 | if self.invert_y: 261 | cmap = cmap.reversed() 262 | line_collection = LineCollection(segments, lw=lw, cmap=cmap, 263 | snap=False, transform=trans) 264 | line_collection.set_array(x) 265 | else: 266 | line_collection = LineCollection(segments, lw=lw, colors=artist.get_colors()[0], 267 | snap=False, transform=trans) 268 | return [line_collection] 269 | -------------------------------------------------------------------------------- /logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ali-Hasan-Khan/Scrape-Whoscored-Event-Data/1bdabe58386f57edc417a9ae6590a507635060bb/logo.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Oct 14 14:20:02 2020 4 | 5 | @author: aliha 6 | @twitter: rockingAli5 7 | """ 8 | 9 | import warnings 10 | import time 11 | import pandas as pd 12 | pd.options.mode.chained_assignment = None 13 | import json 14 | from bs4 import BeautifulSoup as soup 15 | import re 16 | from collections import OrderedDict 17 | from datetime import datetime as dt 18 | import itertools 19 | import numpy as np 20 | try: 21 | from tqdm import trange 22 | except ModuleNotFoundError: 23 | pass 24 | 25 | 26 | from selenium import webdriver 27 | from selenium.common.exceptions import NoSuchElementException, WebDriverException 28 | from selenium.webdriver.common.by import By 29 | 30 | # options = webdriver.FirefoxOptions() 31 | 32 | # options.add_experimental_option('excludeSwitches', ['enable-logging']) 33 | 34 | 35 | TRANSLATE_DICT = {'Jan': 'Jan', 36 | 'Feb': 'Feb', 37 | 'Mac': 'Mar', 38 | 'Apr': 'Apr', 39 | 'Mei': 'May', 40 | 'Jun': 'Jun', 41 | 'Jul': 'Jul', 42 | 'Ago': 'Aug', 43 | 'Sep': 'Sep', 44 | 'Okt': 'Oct', 45 | 'Nov': 'Nov', 46 | 'Des': 'Dec', 47 | 'Jan': 'Jan', 48 | 'Feb': 'Feb', 49 | 'Mar': 'Mar', 50 | 'Apr': 'Apr', 51 | 'May': 'May', 52 | 'Jun': 'Jun', 53 | 'Jul': 'Jul', 54 | 'Aug': 'Aug', 55 | 'Sep': 'Sep', 56 | 'Oct': 'Oct', 57 | 'Nov': 'Nov', 58 | 'Dec': 'Dec'} 59 | 60 | main_url = 'https://1xbet.whoscored.com/' 61 | 62 | 63 | 64 | def getLeagueUrls(minimize_window=True): 65 | 66 | driver = webdriver.Firefox() 67 | 68 | if minimize_window: 69 | driver.minimize_window() 70 | 71 | driver.get(main_url) 72 | league_names = [] 73 | league_urls = [] 74 | try: 75 | cookie_button = driver.find_element(By.XPATH, '//*[@class=" css-gweyaj"]').click() 76 | except NoSuchElementException: 77 | pass 78 | tournaments_btn = driver.find_element(By.XPATH, '//*[@id="All-Tournaments-btn"]').click() 79 | n_button = soup(driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[1]/div/div').get_attribute('innerHTML')).find_all('button') 80 | n_tournaments = [] 81 | for button in n_button: 82 | id_button = button.get('id') 83 | driver.find_element(By.ID, id_button).click() 84 | n_country = soup(driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[2]').get_attribute('innerHTML')).find_all('div', {'class':'TournamentsDropdownMenu-module_countryDropdownContainer__I9P6n'}) 85 | 86 | for country in n_country: 87 | country_id = country.find('div', {'class': 'TournamentsDropdownMenu-module_countryDropdown__8rtD-'}).get('id') 88 | 89 | # Trouver l'élément avec Selenium et cliquer dessus 90 | country_element = driver.find_element(By.ID, country_id) 91 | country_element.click() 92 | 93 | html_tournaments_list = driver.find_element(By.XPATH, '//*[@id="header-wrapper"]/div/div/div/div[4]/div[2]/div/div/div/div[2]').get_attribute('innerHTML') 94 | 95 | # Parse le HTML avec BeautifulSoup pour trouver les liens des tournois 96 | soup_tournaments = soup(html_tournaments_list, 'html.parser') 97 | tournaments = soup_tournaments.find_all('a') 98 | 99 | # Ajouter les tournois à la liste n_tournaments 100 | n_tournaments.extend(tournaments) 101 | 102 | driver.execute_script("arguments[0].click();", country_element) 103 | 104 | 105 | for tournament in n_tournaments: 106 | league_name = tournament.get('href').split('/')[-1] 107 | league_link = main_url[:-1]+tournament.get('href') 108 | league_names.append(league_name) 109 | league_urls.append(league_link) 110 | 111 | leagues = {} 112 | for name,link in zip(league_names,league_urls): 113 | leagues[name] = link 114 | 115 | driver.close() 116 | return leagues 117 | 118 | 119 | def getMatchUrls(comp_urls, competition, season, maximize_window=True): 120 | 121 | driver = webdriver.Firefox() 122 | 123 | if maximize_window: 124 | driver.maximize_window() 125 | 126 | comp_url = comp_urls[competition] 127 | driver.get(comp_url) 128 | time.sleep(5) 129 | 130 | seasons = driver.find_element(By.XPATH, '//*[@id="seasons"]').get_attribute('innerHTML').split(sep='\n') 131 | seasons = [i for i in seasons if i] 132 | 133 | 134 | for i in range(1, len(seasons)+1): 135 | if driver.find_element(By.XPATH, '//*[@id="seasons"]/option['+str(i)+']').text == season: 136 | driver.find_element(By.XPATH, '//*[@id="seasons"]/option['+str(i)+']').click() 137 | 138 | time.sleep(5) 139 | try: 140 | stages = driver.find_element(By.XPATH, '//*[@id="stages"]').get_attribute('innerHTML').split(sep='\n') 141 | stages = [i for i in stages if i] 142 | 143 | all_urls = [] 144 | 145 | for i in range(1, len(stages)+1): 146 | print(driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text) 147 | if competition == 'Champions League' or competition == 'Europa League': 148 | if 'Grp' in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text or 'Final Stage' in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text: 149 | driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click() 150 | time.sleep(5) 151 | 152 | driver.execute_script("window.scrollTo(0, 400)") 153 | 154 | match_urls = getFixtureData(driver) 155 | 156 | match_urls = getSortedData(match_urls) 157 | 158 | match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']] 159 | 160 | all_urls += match_urls2 161 | else: 162 | continue 163 | 164 | elif competition == 'Major League Soccer': 165 | if 'Grp. ' not in driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').text: 166 | driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click() 167 | time.sleep(5) 168 | 169 | driver.execute_script("window.scrollTo(0, 400)") 170 | 171 | match_urls = getFixtureData(driver) 172 | 173 | match_urls = getSortedData(match_urls) 174 | 175 | match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']] 176 | 177 | all_urls += match_urls2 178 | else: 179 | continue 180 | 181 | else: 182 | driver.find_element(By.XPATH, '//*[@id="stages"]/option['+str(i)+']').click() 183 | time.sleep(5) 184 | 185 | driver.execute_script("window.scrollTo(0, 400)") 186 | 187 | match_urls = getFixtureData(driver) 188 | 189 | match_urls = getSortedData(match_urls) 190 | 191 | match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']] 192 | 193 | all_urls += match_urls2 194 | 195 | except NoSuchElementException: 196 | all_urls = [] 197 | 198 | driver.execute_script("window.scrollTo(0, 400)") 199 | 200 | match_urls = getFixtureData(driver) 201 | 202 | match_urls = getSortedData(match_urls) 203 | 204 | match_urls2 = [url for url in match_urls if '?' not in url['date'] and '\n' not in url['date']] 205 | 206 | all_urls += match_urls2 207 | 208 | 209 | remove_dup = [dict(t) for t in {tuple(sorted(d.items())) for d in all_urls}] 210 | all_urls = getSortedData(remove_dup) 211 | 212 | driver.close() 213 | 214 | return all_urls 215 | 216 | season_names = [re.search(r'\>(.*?)\<',season).group(1) for season in seasons] 217 | driver.close() 218 | print('Seasons available: {}'.format(season_names)) 219 | raise('Season Not Found.') 220 | 221 | 222 | 223 | 224 | 225 | def getTeamUrls(team, match_urls): 226 | 227 | team_data = [] 228 | for fixture in match_urls: 229 | if fixture['home'] == team or fixture['away'] == team: 230 | team_data.append(fixture) 231 | team_data = [a[0] for a in itertools.groupby(team_data)] 232 | 233 | return team_data 234 | 235 | 236 | def getMatchesData(match_urls, minimize_window=True): 237 | 238 | matches = [] 239 | 240 | driver = webdriver.Firefox() 241 | if minimize_window: 242 | driver.minimize_window() 243 | 244 | try: 245 | for i in trange(len(match_urls), desc='Getting Match Data'): 246 | # recommended to avoid getting blocked by incapsula/imperva bots 247 | time.sleep(7) 248 | match_data = getMatchData(driver, main_url+match_urls[i]['url'], display=False, close_window=False) 249 | matches.append(match_data) 250 | except NameError: 251 | print('Recommended: \'pip install tqdm\' for a progress bar while the data gets scraped....') 252 | time.sleep(7) 253 | for i in range(len(match_urls)): 254 | match_data = getMatchData(driver, main_url+match_urls[i]['url'], display=False, close_window=False) 255 | matches.append(match_data) 256 | 257 | driver.close() 258 | 259 | return matches 260 | 261 | 262 | 263 | 264 | def getFixtureData(driver): 265 | matches_ls = [] 266 | while True: 267 | initial = driver.page_source 268 | all_fixtures = driver.find_elements(By.CLASS_NAME, 'Accordion-module_accordion__UuHD0') 269 | for dates in all_fixtures: 270 | fixtures = dates.find_elements(By.CLASS_NAME, 'Match-module_row__zwBOn') 271 | date_row = dates.find_element(By.CLASS_NAME, 'Accordion-module_header__HqzWD') 272 | for row in fixtures: 273 | url = row.find_element(By.TAG_NAME, 'a') 274 | if 'live' in url.get_attribute('href'): 275 | # print(url.get_attribute('href')) 276 | match_dict = {} 277 | element = soup(row.get_attribute('innerHTML'), features='lxml') 278 | teams_tag = element.find("div", {"class":"Match-module_teams__sGVeq"}) 279 | link_tag = element.find("a") 280 | match_dict['date'] = date_row.text 281 | match_dict['home'] = teams_tag.find_all('a')[0].text 282 | match_dict['away'] = teams_tag.find_all('a')[1].text 283 | match_dict['score'] = ':'.join([t.text for t in link_tag.find_all('span')]) 284 | match_dict['url'] = link_tag['href'] 285 | # print(match_dict) 286 | matches_ls.append(match_dict) 287 | prev_btn = driver.find_element(By.ID, 'dayChangeBtn-prev') 288 | prev_btn.click() 289 | time.sleep(1) 290 | final = driver.page_source 291 | if initial == final: 292 | break 293 | 294 | return matches_ls 295 | 296 | 297 | 298 | 299 | 300 | 301 | def translateDate(data): 302 | 303 | unwanted = [] 304 | for match in data: 305 | date = match['date'].split() 306 | if '?' not in date[0]: 307 | try: 308 | match['date'] = ' '.join([TRANSLATE_DICT[date[0]], date[1], date[2]]) 309 | except KeyError: 310 | print(date) 311 | else: 312 | unwanted.append(data.index(match)) 313 | 314 | # remove matches that got suspended/postponed 315 | for i in sorted(unwanted, reverse = True): 316 | del data[i] 317 | 318 | return data 319 | 320 | 321 | def getSortedData(data): 322 | data = sorted(data, key = lambda i: dt.strptime(i['date'], '%A, %b %d %Y')) 323 | return data 324 | 325 | 326 | 327 | 328 | def getMatchData(driver, url, display=True, close_window=True): 329 | try: 330 | driver.get(url) 331 | except WebDriverException: 332 | driver.get(url) 333 | 334 | time.sleep(5) 335 | # get script data from page source 336 | script_content = driver.find_element(By.XPATH, '//*[@id="layout-wrapper"]/script[1]').get_attribute('innerHTML') 337 | 338 | 339 | # clean script content 340 | script_content = re.sub(r"[\n\t]*", "", script_content) 341 | script_content = script_content[script_content.index("matchId"):script_content.rindex("}")] 342 | 343 | 344 | # this will give script content in list form 345 | script_content_list = list(filter(None, script_content.strip().split(', '))) 346 | metadata = script_content_list.pop(1) 347 | 348 | 349 | # string format to json format 350 | match_data = json.loads(metadata[metadata.index('{'):]) 351 | keys = [item[:item.index(':')].strip() for item in script_content_list] 352 | values = [item[item.index(':')+1:].strip() for item in script_content_list] 353 | for key,val in zip(keys, values): 354 | match_data[key] = json.loads(val) 355 | 356 | 357 | # get other details about the match 358 | region = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/span[1]').text 359 | league = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[0] 360 | season = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[1] 361 | if len(driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')) == 2: 362 | competition_type = 'League' 363 | competition_stage = '' 364 | elif len(driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - '))== 3: 365 | competition_type = 'Knock Out' 366 | competition_stage = driver.find_element(By.XPATH, '//*[@id="breadcrumb-nav"]/a').text.split(' - ')[-1] 367 | else: 368 | print('Getting more than 3 types of information about the competition.') 369 | 370 | match_data['region'] = region 371 | match_data['league'] = league 372 | match_data['season'] = season 373 | match_data['competitionType'] = competition_type 374 | match_data['competitionStage'] = competition_stage 375 | 376 | 377 | # sort match_data dictionary alphabetically 378 | match_data = OrderedDict(sorted(match_data.items())) 379 | match_data = dict(match_data) 380 | if display: 381 | print('Region: {}, League: {}, Season: {}, Match Id: {}'.format(region, league, season, match_data['matchId'])) 382 | 383 | 384 | if close_window: 385 | driver.close() 386 | 387 | return match_data 388 | 389 | 390 | 391 | 392 | 393 | def createEventsDF(data): 394 | events = data['events'] 395 | for event in events: 396 | event.update({'matchId' : data['matchId'], 397 | 'startDate' : data['startDate'], 398 | 'startTime' : data['startTime'], 399 | 'score' : data['score'], 400 | 'ftScore' : data['ftScore'], 401 | 'htScore' : data['htScore'], 402 | 'etScore' : data['etScore'], 403 | 'venueName' : data['venueName'], 404 | 'maxMinute' : data['maxMinute']}) 405 | events_df = pd.DataFrame(events) 406 | 407 | # clean period column 408 | events_df['period'] = pd.json_normalize(events_df['period'])['displayName'] 409 | 410 | # clean type column 411 | events_df['type'] = pd.json_normalize(events_df['type'])['displayName'] 412 | 413 | # clean outcomeType column 414 | events_df['outcomeType'] = pd.json_normalize(events_df['outcomeType'])['displayName'] 415 | 416 | # clean outcomeType column 417 | try: 418 | x = events_df['cardType'].fillna({i: {} for i in events_df.index}) 419 | events_df['cardType'] = pd.json_normalize(x)['displayName'].fillna(False) 420 | except KeyError: 421 | events_df['cardType'] = False 422 | 423 | eventTypeDict = data['matchCentreEventTypeJson'] 424 | events_df['satisfiedEventsTypes'] = events_df['satisfiedEventsTypes'].apply(lambda x: [list(eventTypeDict.keys())[list(eventTypeDict.values()).index(event)] for event in x]) 425 | 426 | # clean qualifiers column 427 | try: 428 | for i in events_df.index: 429 | row = events_df.loc[i, 'qualifiers'].copy() 430 | if len(row) != 0: 431 | for irow in range(len(row)): 432 | row[irow]['type'] = row[irow]['type']['displayName'] 433 | except TypeError: 434 | pass 435 | 436 | 437 | # clean isShot column 438 | with warnings.catch_warnings(): 439 | warnings.simplefilter("ignore", category=FutureWarning) 440 | if 'isShot' in events_df.columns: 441 | events_df['isShot'] = events_df['isShot'].replace(np.nan, False).infer_objects(copy=False) 442 | else: 443 | events_df['isShot'] = False 444 | 445 | # clean isGoal column 446 | if 'isGoal' in events_df.columns: 447 | events_df['isGoal'] = events_df['isGoal'].replace(np.nan, False).infer_objects(copy=False) 448 | else: 449 | events_df['isGoal'] = False 450 | 451 | # add player name column 452 | with warnings.catch_warnings(): 453 | warnings.simplefilter("ignore", category=FutureWarning) 454 | events_df.loc[events_df.playerId.notna(), 'playerId'] = events_df.loc[events_df.playerId.notna(), 'playerId'].astype(int).astype(str) 455 | player_name_col = events_df.loc[:, 'playerId'].map(data['playerIdNameDictionary']) 456 | events_df.insert(loc=events_df.columns.get_loc("playerId")+1, column='playerName', value=player_name_col) 457 | 458 | # add home/away column 459 | h_a_col = events_df['teamId'].map({data['home']['teamId']:'h', data['away']['teamId']:'a'}) 460 | events_df.insert(loc=events_df.columns.get_loc("teamId")+1, column='h_a', value=h_a_col) 461 | 462 | 463 | # adding shot body part column 464 | events_df['shotBodyType'] = np.nan 465 | with warnings.catch_warnings(): 466 | warnings.simplefilter("ignore", category=FutureWarning) 467 | for i in events_df.loc[events_df.isShot==True].index: 468 | for j in events_df.loc[events_df.isShot==True].qualifiers.loc[i]: 469 | if j['type'] == 'RightFoot' or j['type'] == 'LeftFoot' or j['type'] == 'Head' or j['type'] == 'OtherBodyPart': 470 | events_df.loc[i, 'shotBodyType'] = j['type'] 471 | 472 | 473 | # adding shot situation column 474 | events_df['situation'] = np.nan 475 | with warnings.catch_warnings(): 476 | warnings.simplefilter("ignore", category=FutureWarning) 477 | for i in events_df.loc[events_df.isShot==True].index: 478 | for j in events_df.loc[events_df.isShot==True].qualifiers.loc[i]: 479 | if j['type'] == 'FromCorner' or j['type'] == 'SetPiece' or j['type'] == 'DirectFreekick': 480 | events_df.loc[i, 'situation'] = j['type'] 481 | if j['type'] == 'RegularPlay': 482 | events_df.loc[i, 'situation'] = 'OpenPlay' 483 | 484 | event_types = list(data['matchCentreEventTypeJson'].keys()) 485 | event_type_cols = pd.DataFrame({event_type: pd.Series([event_type in row for row in events_df['satisfiedEventsTypes']]) for event_type in event_types}) 486 | events_df = pd.concat([events_df, event_type_cols], axis=1) 487 | 488 | 489 | return events_df 490 | 491 | 492 | 493 | 494 | def createMatchesDF(data): 495 | columns_req_ls = ['matchId', 'attendance', 'venueName', 'startTime', 'startDate', 496 | 'score', 'home', 'away', 'referee'] 497 | matches_df = pd.DataFrame(columns=columns_req_ls) 498 | if type(data) == dict: 499 | matches_dict = dict([(key,val) for key,val in data.items() if key in columns_req_ls]) 500 | matches_df = pd.DataFrame(matches_dict, columns=columns_req_ls).reset_index(drop=True) 501 | matches_df[['home', 'away']] = np.nan 502 | with warnings.catch_warnings(): 503 | warnings.simplefilter("ignore", category=FutureWarning) 504 | matches_df['home'].iloc[0] = [data['home']] 505 | matches_df['away'].iloc[0] = [data['away']] 506 | else: 507 | for match in data: 508 | matches_dict = dict([(key,val) for key,val in match.items() if key in columns_req_ls]) 509 | matches_df = pd.DataFrame(matches_dict, columns=columns_req_ls).reset_index(drop=True) 510 | 511 | matches_df = matches_df.set_index('matchId') 512 | return matches_df 513 | 514 | 515 | 516 | 517 | def load_EPV_grid(fname='EPV_grid.csv'): 518 | """ load_EPV_grid(fname='EPV_grid.csv') 519 | 520 | # load pregenerated EPV surface from file. 521 | 522 | Parameters 523 | ----------- 524 | fname: filename & path of EPV grid (default is 'EPV_grid.csv' in the curernt directory) 525 | 526 | Returns 527 | ----------- 528 | EPV: The EPV surface (default is a (32,50) grid) 529 | 530 | """ 531 | epv = np.loadtxt(fname, delimiter=',') 532 | return epv 533 | 534 | 535 | 536 | 537 | 538 | 539 | def get_EPV_at_location(position,EPV,attack_direction,field_dimen=(106.,68.)): 540 | """ get_EPV_at_location 541 | 542 | Returns the EPV value at a given (x,y) location 543 | 544 | Parameters 545 | ----------- 546 | position: Tuple containing the (x,y) pitch position 547 | EPV: tuple Expected Possession value grid (loaded using load_EPV_grid() ) 548 | attack_direction: Sets the attack direction (1: left->right, -1: right->left) 549 | field_dimen: tuple containing the length and width of the pitch in meters. Default is (106,68) 550 | 551 | Returrns 552 | ----------- 553 | EPV value at input position 554 | 555 | """ 556 | 557 | x,y = position 558 | if abs(x)>field_dimen[0]/2. or abs(y)>field_dimen[1]/2.: 559 | return 0.0 # Position is off the field, EPV is zero 560 | else: 561 | if attack_direction==-1: 562 | EPV = np.fliplr(EPV) 563 | ny,nx = EPV.shape 564 | dx = field_dimen[0]/float(nx) 565 | dy = field_dimen[1]/float(ny) 566 | ix = (x+field_dimen[0]/2.-0.0001)/dx 567 | iy = (y+field_dimen[1]/2.-0.0001)/dy 568 | return EPV[int(iy),int(ix)] 569 | 570 | 571 | 572 | 573 | 574 | def to_metric_coordinates_from_whoscored(data,field_dimen=(106.,68.) ): 575 | ''' 576 | Convert positions from Whoscored units to meters (with origin at centre circle) 577 | ''' 578 | x_columns = [c for c in data.columns if c[-1].lower()=='x'][:2] 579 | y_columns = [c for c in data.columns if c[-1].lower()=='y'][:2] 580 | x_columns_mod = [c+'_metrica' for c in x_columns] 581 | y_columns_mod = [c+'_metrica' for c in y_columns] 582 | data[x_columns_mod] = (data[x_columns]/100*106)-53 583 | data[y_columns_mod] = (data[y_columns]/100*68)-34 584 | return data 585 | 586 | 587 | 588 | 589 | def addEpvToDataFrame(data): 590 | 591 | # loading EPV data 592 | EPV = load_EPV_grid('EPV_grid.csv') 593 | 594 | # converting opta coordinates to metric coordinates 595 | data = to_metric_coordinates_from_whoscored(data) 596 | 597 | # calculating EPV for events 598 | EPV_difference = [] 599 | for i in data.index: 600 | if data.loc[i, 'type'] == 'Pass' and data.loc[i, 'outcomeType'] == 'Successful': 601 | start_pos = (data.loc[i, 'x_metrica'], data.loc[i, 'y_metrica']) 602 | start_epv = get_EPV_at_location(start_pos, EPV, attack_direction=1) 603 | 604 | end_pos = (data.loc[i, 'endX_metrica'], data.loc[i, 'endY_metrica']) 605 | end_epv = get_EPV_at_location(end_pos, EPV, attack_direction=1) 606 | 607 | diff = end_epv - start_epv 608 | EPV_difference.append(diff) 609 | 610 | else: 611 | EPV_difference.append(np.nan) 612 | 613 | data = data.assign(EPV_difference = EPV_difference) 614 | 615 | 616 | # dump useless columns 617 | drop_cols = ['x_metrica', 'endX_metrica', 'y_metrica', 618 | 'endY_metrica'] 619 | data.drop(drop_cols, axis=1, inplace=True) 620 | data.rename(columns={'EPV_difference': 'EPV'}, inplace=True) 621 | 622 | return data 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | matplotlib 4 | seaborn 5 | selenium==4.16.0 6 | mplsoccer==1.2.2 7 | requests 8 | unzip 9 | tqdm -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # import relevant functions 2 | from main import getLeagueUrls, getMatchUrls, getTeamUrls, getMatchesData, getMatchData, createEventsDF, createMatchesDF, addEpvToDataFrame 3 | 4 | # import relevant variables 5 | from main import main_url 6 | 7 | # import relevant packages 8 | import pandas as pd 9 | 10 | from selenium import webdriver 11 | options = webdriver.ChromeOptions() 12 | options.add_experimental_option('excludeSwitches', ['enable-logging']) 13 | 14 | 15 | 16 | 17 | # write test functions for all functions in file 18 | def test(): 19 | print('Testing getLeagueUrls function...') 20 | leagues = getLeagueUrls() 21 | assert type(leagues) == dict 22 | assert len(leagues) == 23 23 | print('getLeagueUrls function passed all tests.') 24 | 25 | print('Testing getMatchUrls function...') 26 | comp_urls = getLeagueUrls() 27 | match_urls = getMatchUrls(comp_urls, 'Premier League', '2019/2020') 28 | assert type(match_urls) == list 29 | assert len(match_urls) == 380 30 | print('getMatchUrls function passed all tests.') 31 | 32 | print('Testing getTeamUrls function...') 33 | team_urls = getTeamUrls('Liverpool', match_urls) 34 | assert type(team_urls) == list 35 | assert len(team_urls) == 38 36 | print('getTeamUrls function passed all tests.') 37 | 38 | print('Testing getMatchesData function...') 39 | matches = getMatchesData(team_urls) 40 | assert type(matches) == list 41 | assert len(matches) == 38 42 | print('getMatchesData function passed all tests.') 43 | 44 | print('Testing getMatchData function...') 45 | driver = webdriver.Chrome('drivers/chromedriver.exe', options=options) 46 | match_data = getMatchData(driver, main_url+'/Matches/1375927/Live/England-Premier-League-2019-2020-Liverpool-Norwich') 47 | assert type(match_data) == dict 48 | assert len(match_data) == 36 49 | print('getMatchData function passed all tests.') 50 | 51 | print('Testing createEventsDF function...') 52 | events_df = createEventsDF(match_data) 53 | assert type(events_df) == pd.core.frame.DataFrame 54 | assert events_df.shape[1] == 259 55 | print('createEventsDF function passed all tests.') 56 | 57 | print('Testing createMatchesDF function...') 58 | matches_df = createMatchesDF(match_data) 59 | assert type(matches_df) == pd.core.frame.DataFrame 60 | assert matches_df.shape[1] == 8 61 | print('createMatchesDF function passed all tests.') 62 | 63 | print('Testing addEpvToDataFrame function...') 64 | events_df = addEpvToDataFrame(events_df) 65 | assert type(events_df) == pd.core.frame.DataFrame 66 | assert events_df.shape[1] == 260 67 | print('addEpvToDataFrame function passed all tests.') 68 | 69 | print('All tests passed.') 70 | 71 | if __name__ == '__main__': 72 | test() 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from sys import platform 4 | 5 | def extract_version_registry(output): 6 | try: 7 | google_version = '' 8 | for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]: 9 | if letter != '\n': 10 | google_version += letter 11 | else: 12 | break 13 | return(google_version.strip()) 14 | except TypeError: 15 | return 16 | 17 | def extract_version_folder(): 18 | # Check if the Chrome folder exists in the x32 or x64 Program Files folders. 19 | for i in range(2): 20 | path = 'C:\\Program Files' + (' (x86)' if i else '') +'\\Google\\Chrome\\Application' 21 | if os.path.isdir(path): 22 | paths = [f.path for f in os.scandir(path) if f.is_dir()] 23 | for path in paths: 24 | filename = os.path.basename(path) 25 | pattern = '\d+\.\d+\.\d+\.\d+' 26 | match = re.search(pattern, filename) 27 | if match and match.group(): 28 | # Found a Chrome version. 29 | return match.group(0) 30 | 31 | return None 32 | 33 | def get_chrome_version(): 34 | version = None 35 | install_path = None 36 | 37 | try: 38 | if platform == "linux" or platform == "linux2": 39 | # linux 40 | install_path = "/usr/bin/google-chrome" 41 | elif platform == "darwin": 42 | # OS X 43 | install_path = "/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome" 44 | elif platform == "win32": 45 | # Windows... 46 | try: 47 | # Try registry key. 48 | stream = os.popen('reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"') 49 | output = stream.read() 50 | version = extract_version_registry(output) 51 | except Exception as ex: 52 | # Try folder path. 53 | version = extract_version_folder() 54 | except Exception as ex: 55 | print(ex) 56 | 57 | version = os.popen(f"{install_path} --version").read().strip('Google Chrome ').strip() if install_path else version 58 | 59 | return version -------------------------------------------------------------------------------- /visuals.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Oct 14 14:38:46 2020 4 | 5 | @author: aliha 6 | @twitter: rockingAli5 7 | """ 8 | 9 | import pandas as pd 10 | import numpy as np 11 | from mplsoccer.pitch import Pitch, VerticalPitch 12 | from matplotlib.colors import to_rgba 13 | from matplotlib.patches import ConnectionPatch 14 | from itertools import combinations 15 | import seaborn as sns 16 | 17 | 18 | def createShotmap(match_data, events_df, team, pitchcolor, shotcolor, goalcolor, 19 | titlecolor, legendcolor, marker_size, fig, ax): 20 | # getting team id and venue 21 | if match_data['home']['name'] == team: 22 | teamId = match_data['home']['teamId'] 23 | venue = 'home' 24 | else: 25 | teamId = match_data['away']['teamId'] 26 | venue = 'away' 27 | 28 | # getting opponent 29 | if venue == 'home': 30 | opponent = match_data['away']['name'] 31 | else: 32 | opponent = match_data['home']['name'] 33 | 34 | total_shots = events_df.loc[events_df['isShot']==True].reset_index(drop=True) 35 | team_shots = total_shots.loc[total_shots['teamId'] == teamId].reset_index(drop=True) 36 | mask_goal = team_shots.isGoal == True 37 | 38 | # Setup the pitch 39 | # orientation='vertical' 40 | pitch = VerticalPitch(pitch_type='statsbomb', pitch_color=pitchcolor, line_color='#c7d5cc', 41 | half=True, pad_top=2) 42 | pitch.draw(ax=ax, tight_layout=True, constrained_layout=True) 43 | 44 | 45 | # Plot the goals 46 | pitch.scatter(team_shots[mask_goal].x/100*120, 80-team_shots[mask_goal].y/100*80, s=marker_size, 47 | edgecolors='black', c=goalcolor, zorder=2, 48 | label='goal', ax=ax) 49 | pitch.scatter(team_shots[~mask_goal].x/100*120, 80-team_shots[~mask_goal].y/100*80, 50 | edgecolors='white', c=shotcolor, s=marker_size, zorder=2, 51 | label='shot', ax=ax) 52 | # Set the title 53 | ax.set_title(f'{team} shotmap \n vs {opponent}', fontsize=30, color=titlecolor) 54 | 55 | # set legend 56 | leg = ax.legend(facecolor=pitchcolor, edgecolor='None', fontsize=20, loc='lower center', handlelength=4) 57 | leg_texts = leg.get_texts() # list of matplotlib Text instances. 58 | leg_texts[0].set_color(legendcolor) 59 | leg_texts[1].set_color(legendcolor) 60 | 61 | # Set the figure facecolor 62 | fig.set_facecolor(pitchcolor) 63 | 64 | 65 | 66 | 67 | 68 | 69 | def createPassNetworks(match_data, events_df, matchId, team, max_line_width, 70 | marker_size, edgewidth, dh_arrow_width, marker_color, 71 | marker_edge_color, shrink, ax, kit_no_size=20): 72 | 73 | # getting team id and venue 74 | if match_data['home']['name'] == team: 75 | teamId = match_data['home']['teamId'] 76 | venue = 'home' 77 | else: 78 | teamId = match_data['away']['teamId'] 79 | venue = 'away' 80 | 81 | 82 | # getting opponent 83 | if venue == 'home': 84 | opponent = match_data['away']['name'] 85 | else: 86 | opponent = match_data['home']['name'] 87 | 88 | 89 | # getting player dictionary 90 | team_players_dict = {} 91 | for player in match_data[venue]['players']: 92 | team_players_dict[player['playerId']] = player['name'] 93 | 94 | 95 | # getting minute of first substitution 96 | for i in events_df.index: 97 | if events_df.loc[i, 'type'] == 'SubstitutionOn' and events_df.loc[i, 'teamId'] == teamId: 98 | sub_minute = str(events_df.loc[i, 'minute']) 99 | break 100 | 101 | 102 | # getting players dataframe 103 | match_players_df = pd.DataFrame() 104 | player_names = [] 105 | player_ids = [] 106 | player_pos = [] 107 | player_kit_number = [] 108 | 109 | 110 | for player in match_data[venue]['players']: 111 | player_names.append(player['name']) 112 | player_ids.append(player['playerId']) 113 | player_pos.append(player['position']) 114 | player_kit_number.append(player['shirtNo']) 115 | 116 | match_players_df['playerId'] = player_ids 117 | match_players_df['playerName'] = player_names 118 | match_players_df['playerPos'] = player_pos 119 | match_players_df['playerKitNumber'] = player_kit_number 120 | 121 | 122 | # extracting passes 123 | passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1) 124 | passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64') 125 | if 'playerName' in passes_df.columns: 126 | passes_df = passes_df.drop(columns='playerName') 127 | passes_df.dropna(subset=["playerId"], inplace=True) 128 | passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])]) 129 | if 'passRecipientId' in passes_df.columns: 130 | passes_df = passes_df.drop(columns='passRecipientId') 131 | passes_df = passes_df.drop(columns='passRecipientName') 132 | passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1)) 133 | passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1)) 134 | passes_df.dropna(subset=["passRecipientName"], inplace=True) 135 | passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True) 136 | passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True) 137 | index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index 138 | passes_df.drop(index_names, inplace=True) 139 | passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1') 140 | passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'}, 141 | axis='columns'), on=['passRecipientId', 'passRecipientName'], 142 | how='left', validate='m:1', suffixes=['', 'Receipt']) 143 | passes_df = passes_df[passes_df['playerPos'] != 'Sub'] 144 | 145 | 146 | # getting team formation 147 | formation = match_data[venue]['formations'][0]['formationName'] 148 | formation = '-'.join(formation) 149 | 150 | 151 | # getting player average locations 152 | location_formation = passes_df[['playerKitNumber', 'x', 'y']] 153 | average_locs_and_count = location_formation.groupby('playerKitNumber').agg({'x': ['mean'], 'y': ['mean', 'count']}) 154 | average_locs_and_count.columns = ['x', 'y', 'count'] 155 | 156 | 157 | # getting separate dataframe for selected columns 158 | passes_formation = passes_df[['id', 'playerKitNumber', 'playerKitNumberReceipt']].copy() 159 | passes_formation['EPV'] = passes_df['EPV'] 160 | 161 | 162 | # getting dataframe for passes between players 163 | passes_between = passes_formation.groupby(['playerKitNumber', 'playerKitNumberReceipt']).agg({ 'id' : 'count', 'EPV' : 'sum'}).reset_index() 164 | passes_between.rename({'id': 'pass_count'}, axis='columns', inplace=True) 165 | passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumberReceipt', right_index=True) 166 | passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumber', right_index=True, 167 | suffixes=['', '_end']) 168 | 169 | 170 | # filtering passes 171 | pass_filter = int(passes_between['pass_count'].mean()) 172 | passes_between = passes_between.loc[passes_between['pass_count'] > pass_filter] 173 | 174 | 175 | # calculating the line width 176 | passes_between['width'] = passes_between.pass_count / passes_between.pass_count.max() * max_line_width 177 | passes_between = passes_between.reset_index(drop=True) 178 | 179 | 180 | # setting color to make the lines more transparent when fewer passes are made 181 | min_transparency = 0.3 182 | color = np.array(to_rgba('white')) 183 | color = np.tile(color, (len(passes_between), 1)) 184 | c_transparency = passes_between.pass_count / passes_between.pass_count.max() 185 | c_transparency = (c_transparency * (1 - min_transparency)) + min_transparency 186 | color[:, 3] = c_transparency 187 | passes_between['alpha'] = color.tolist() 188 | 189 | 190 | # separating paired passes from normal passes 191 | passes_between_threshold = 15 192 | filtered_pair_df = [] 193 | pair_list = [comb for comb in combinations(passes_between['playerKitNumber'].unique(), 2)] 194 | for pair in pair_list: 195 | df = passes_between[((passes_between['playerKitNumber']==pair[0]) & (passes_between['playerKitNumberReceipt']==pair[1])) | 196 | ((passes_between['playerKitNumber']==pair[1]) & (passes_between['playerKitNumberReceipt']==pair[0]))] 197 | if df.shape[0] == 2: 198 | if (np.array(df.pass_count)[0] >= passes_between_threshold) and (np.array(df.pass_count)[1] >= passes_between_threshold): 199 | filtered_pair_df.append(df) 200 | passes_between.drop(df.index, inplace=True) 201 | if len(filtered_pair_df) > 0: 202 | filtered_pair_df = pd.concat(filtered_pair_df).reset_index(drop=True) 203 | passes_between = passes_between.reset_index(drop=True) 204 | 205 | 206 | # plotting 207 | pitch = Pitch(pitch_type='opta', pitch_color='#171717', line_color='#5c5c5c', 208 | goal_type='box') 209 | pitch.draw(ax=ax, constrained_layout=True, tight_layout=True) 210 | average_locs_and_count['zorder'] = list(np.linspace(1,5,11)) 211 | for i in average_locs_and_count.index: 212 | pitch.scatter(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y'], s=marker_size, 213 | color=marker_color, edgecolors=marker_edge_color, linewidth=edgewidth, 214 | alpha=1, zorder=average_locs_and_count.loc[i, 'zorder'], ax=ax) 215 | 216 | for i in passes_between.index: 217 | x = passes_between.loc[i, 'x'] 218 | y = passes_between.loc[i, 'y'] 219 | endX = passes_between.loc[i, 'x_end'] 220 | endY = passes_between.loc[i, 'y_end'] 221 | coordsA = "data" 222 | coordsB = "data" 223 | con = ConnectionPatch([endX, endY], [x, y], 224 | coordsA, coordsB, 225 | arrowstyle="simple", shrinkA=shrink, shrinkB=shrink, 226 | mutation_scale=passes_between.loc[i, 'width']*max_line_width, color=passes_between.loc[i, 'alpha']) 227 | ax.add_artist(con) 228 | 229 | if len(filtered_pair_df) > 0: 230 | for i in filtered_pair_df.index: 231 | x = filtered_pair_df.loc[i, 'x'] 232 | y = filtered_pair_df.loc[i, 'y'] 233 | endX = filtered_pair_df.loc[i, 'x_end'] 234 | endY = filtered_pair_df.loc[i, 'y_end'] 235 | coordsA = "data" 236 | coordsB = "data" 237 | con = ConnectionPatch([endX, endY], [x, y], 238 | coordsA, coordsB, 239 | arrowstyle="<|-|>", shrinkA=shrink, shrinkB=shrink, 240 | mutation_scale=dh_arrow_width, lw=filtered_pair_df.loc[i, 'width']*max_line_width/5, 241 | color=filtered_pair_df.loc[i, 'alpha']) 242 | ax.add_artist(con) 243 | 244 | for i in average_locs_and_count.index: 245 | pitch.annotate(i, xy=(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y']), 246 | family='DejaVu Sans', c='white', 247 | va='center', ha='center', zorder=average_locs_and_count.loc[i, 'zorder'], size=kit_no_size, weight='bold', ax=ax) 248 | ax.text(50, 104, "{} (Mins 1-{})".format(team, sub_minute).upper(), size=10, fontweight='bold', ha='center', 249 | va='center', c='white') 250 | ax.text(2, 3, '{}'.format(formation), size=9, c='grey') 251 | 252 | 253 | 254 | 255 | 256 | 257 | def createAttPassNetworks(match_data, events_df, matchId, team, max_line_width, 258 | marker_size, edgewidth, dh_arrow_width, marker_color, 259 | marker_edge_color, shrink, ax, kit_no_size = 20): 260 | 261 | # getting team id and venue 262 | if match_data['home']['name'] == team: 263 | teamId = match_data['home']['teamId'] 264 | venue = 'home' 265 | else: 266 | teamId = match_data['away']['teamId'] 267 | venue = 'away' 268 | 269 | 270 | # getting opponent 271 | if venue == 'home': 272 | opponent = match_data['away']['name'] 273 | else: 274 | opponent = match_data['home']['name'] 275 | 276 | 277 | # getting player dictionary 278 | team_players_dict = {} 279 | for player in match_data[venue]['players']: 280 | team_players_dict[player['playerId']] = player['name'] 281 | 282 | 283 | # getting minute of first substitution 284 | for i in events_df.index: 285 | if events_df.loc[i, 'type'] == 'SubstitutionOn' and events_df.loc[i, 'teamId'] == teamId: 286 | sub_minute = str(events_df.loc[i, 'minute']) 287 | break 288 | 289 | 290 | # getting players dataframe 291 | match_players_df = pd.DataFrame() 292 | player_names = [] 293 | player_ids = [] 294 | player_pos = [] 295 | player_kit_number = [] 296 | 297 | 298 | for player in match_data[venue]['players']: 299 | player_names.append(player['name']) 300 | player_ids.append(player['playerId']) 301 | player_pos.append(player['position']) 302 | player_kit_number.append(player['shirtNo']) 303 | 304 | match_players_df['playerId'] = player_ids 305 | match_players_df['playerName'] = player_names 306 | match_players_df['playerPos'] = player_pos 307 | match_players_df['playerKitNumber'] = player_kit_number 308 | 309 | 310 | # extracting passes 311 | passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1) 312 | passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64') 313 | if 'playerName' in passes_df.columns: 314 | passes_df = passes_df.drop(columns='playerName') 315 | passes_df.dropna(subset=["playerId"], inplace=True) 316 | passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])]) 317 | if 'passRecipientId' in passes_df.columns: 318 | passes_df = passes_df.drop(columns='passRecipientId') 319 | passes_df = passes_df.drop(columns='passRecipientName') 320 | passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1)) 321 | passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1)) 322 | passes_df.dropna(subset=["passRecipientName"], inplace=True) 323 | passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True) 324 | passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True) 325 | index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index 326 | passes_df.drop(index_names, inplace=True) 327 | passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1') 328 | passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'}, 329 | axis='columns'), on=['passRecipientId', 'passRecipientName'], 330 | how='left', validate='m:1', suffixes=['', 'Receipt']) 331 | passes_df = passes_df[passes_df['playerPos'] != 'Sub'] 332 | 333 | 334 | # getting team formation 335 | formation = match_data[venue]['formations'][0]['formationName'] 336 | formation = '-'.join(formation) 337 | 338 | 339 | # getting player average locations 340 | location_formation = passes_df[['playerKitNumber', 'x', 'y']] 341 | average_locs_and_count = location_formation.groupby('playerKitNumber').agg({'x': ['mean'], 'y': ['mean', 'count']}) 342 | average_locs_and_count.columns = ['x', 'y', 'count'] 343 | 344 | 345 | # filtering progressive passes 346 | passes_df = passes_df.loc[passes_df['EPV'] > 0] 347 | 348 | 349 | # getting separate dataframe for selected columns 350 | passes_formation = passes_df[['id', 'playerKitNumber', 'playerKitNumberReceipt']].copy() 351 | passes_formation['EPV'] = passes_df['EPV'] 352 | 353 | 354 | # getting dataframe for passes between players 355 | passes_between = passes_formation.groupby(['playerKitNumber', 'playerKitNumberReceipt']).agg({ 'id' : 'count', 'EPV' : 'sum'}).reset_index() 356 | passes_between.rename({'id': 'pass_count'}, axis='columns', inplace=True) 357 | passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumberReceipt', right_index=True) 358 | passes_between = passes_between.merge(average_locs_and_count, left_on='playerKitNumber', right_index=True, 359 | suffixes=['', '_end']) 360 | 361 | 362 | # filtering passes 363 | pass_filter = int(passes_between['pass_count'].mean()) 364 | passes_between = passes_between.loc[passes_between['pass_count'] > pass_filter*2] 365 | 366 | 367 | # calculating the line width and marker sizes relative to the largest counts 368 | passes_between['width'] = passes_between.pass_count / passes_between.pass_count.max() * max_line_width 369 | passes_between = passes_between.reset_index(drop=True) 370 | 371 | 372 | # setting color to make the lines more transparent when fewer passes are made 373 | min_transparency = 0.3 374 | color = np.array(to_rgba('white')) 375 | color = np.tile(color, (len(passes_between), 1)) 376 | c_transparency = passes_between.EPV / passes_between.EPV.max() 377 | c_transparency = (c_transparency * (1 - min_transparency)) + min_transparency 378 | color[:, 3] = c_transparency 379 | passes_between['alpha'] = color.tolist() 380 | 381 | 382 | # separating paired passes from normal passes 383 | passes_between_threshold = 20 384 | filtered_pair_df = [] 385 | pair_list = [comb for comb in combinations(passes_between['playerKitNumber'].unique(), 2)] 386 | for pair in pair_list: 387 | df = passes_between[((passes_between['playerKitNumber']==pair[0]) & (passes_between['playerKitNumberReceipt']==pair[1])) | 388 | ((passes_between['playerKitNumber']==pair[1]) & (passes_between['playerKitNumberReceipt']==pair[0]))] 389 | if df.shape[0] == 2: 390 | if np.array(df.pass_count)[0]+np.array(df.pass_count)[1] >= passes_between_threshold: 391 | filtered_pair_df.append(df) 392 | passes_between.drop(df.index, inplace=True) 393 | if len(filtered_pair_df) > 0: 394 | filtered_pair_df = pd.concat(filtered_pair_df).reset_index(drop=True) 395 | passes_between = passes_between.reset_index(drop=True) 396 | 397 | 398 | # plotting 399 | pitch = Pitch(pitch_type='opta', pitch_color='#171717', line_color='#5c5c5c', 400 | goal_type='box') 401 | pitch.draw(ax=ax, constrained_layout=True, tight_layout=True) 402 | 403 | average_locs_and_count['zorder'] = list(np.linspace(1,5,11)) 404 | for i in average_locs_and_count.index: 405 | pitch.scatter(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y'], s=marker_size, 406 | color=marker_color, edgecolors=marker_edge_color, linewidth=edgewidth, 407 | alpha=1, zorder=average_locs_and_count.loc[i, 'zorder'], ax=ax) 408 | 409 | for i in passes_between.index: 410 | x = passes_between.loc[i, 'x'] 411 | y = passes_between.loc[i, 'y'] 412 | endX = passes_between.loc[i, 'x_end'] 413 | endY = passes_between.loc[i, 'y_end'] 414 | coordsA = "data" 415 | coordsB = "data" 416 | con = ConnectionPatch([endX, endY], [x, y], 417 | coordsA, coordsB, 418 | arrowstyle="simple", shrinkA=shrink, shrinkB=shrink, 419 | mutation_scale=passes_between.loc[i, 'width']*max_line_width, color=passes_between.loc[i, 'alpha']) 420 | ax.add_artist(con) 421 | 422 | if len(filtered_pair_df) > 0: 423 | for i in filtered_pair_df.index: 424 | x = filtered_pair_df.loc[i, 'x'] 425 | y = filtered_pair_df.loc[i, 'y'] 426 | endX = filtered_pair_df.loc[i, 'x_end'] 427 | endY = filtered_pair_df.loc[i, 'y_end'] 428 | coordsA = "data" 429 | coordsB = "data" 430 | con = ConnectionPatch([endX, endY], [x, y], 431 | coordsA, coordsB, 432 | arrowstyle="<|-|>", shrinkA=shrink, shrinkB=shrink, 433 | mutation_scale=dh_arrow_width, lw=filtered_pair_df.loc[i, 'width']*max_line_width/5, 434 | color=filtered_pair_df.loc[i, 'alpha']) 435 | ax.add_artist(con) 436 | 437 | for i in average_locs_and_count.index: 438 | pitch.annotate(i, xy=(average_locs_and_count.loc[i, 'x'], average_locs_and_count.loc[i, 'y']), 439 | family='DejaVu Sans', c='white', 440 | va='center', ha='center', zorder=average_locs_and_count.loc[i, 'zorder'], size=kit_no_size, weight='bold', ax=ax) 441 | ax.text(50, 104, "{} (Mins 1-{})".format(team, sub_minute).upper(), size=10, fontweight='bold', ha='center', 442 | va='center', c='white') 443 | ax.text(2, 3, '{}'.format(formation), size=9, c='grey') 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | def getTeamSuccessfulBoxPasses(events_df, teamId, team, pitch_color, cmap): 454 | """ 455 | Parameters 456 | ---------- 457 | events_df : DataFrame of all events. 458 | 459 | teamId : ID of the team, the passes of which are required. 460 | 461 | team : Name of the team, the passes of which are required. 462 | 463 | pitch_color : color of the pitch. 464 | 465 | cmap : color design of the pass lines. 466 | You can select more cmaps here: 467 | https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html 468 | 469 | Returns 470 | ------- 471 | Pitch Plot. 472 | 473 | """ 474 | 475 | # Get Total Passes 476 | passes_df = events_df.loc[events_df['type']=='Pass'].reset_index(drop=True) 477 | 478 | # Get Team Passes 479 | team_passes = passes_df.loc[passes_df['teamId'] == teamId] 480 | 481 | # Extracting Box Passes from Total Passes 482 | box_passes = team_passes.copy() 483 | for i,pas in box_passes.iterrows(): 484 | X = pas["x"]/100*120 485 | Xend = pas["endX"]/100*120 486 | Y = pas["y"]/100*80 487 | Yend = pas["endY"]/100*80 488 | if Xend >= 102 and Yend >= 18 and Yend <= 62: 489 | if X >=102 and Y >= 18 and Y <= 62: 490 | box_passes = box_passes.drop([i]) 491 | else: 492 | pass 493 | else: 494 | box_passes = box_passes.drop([i]) 495 | 496 | 497 | successful_box_passes = box_passes.loc[box_passes['outcomeType']=='Successful'].reset_index(drop=True) 498 | 499 | 500 | # orientation='vertical' 501 | pitch = VerticalPitch(pitch_type='statsbomb', pitch_color=pitch_color, line_color='#c7d5cc', 502 | half=True, pad_top=2) 503 | fig, ax = pitch.draw(tight_layout=True) 504 | 505 | # Plot the completed passes 506 | pitch.lines(successful_box_passes.x/100*120, 80-successful_box_passes.y/100*80, 507 | successful_box_passes.endX/100*120, 80-successful_box_passes.endY/100*80, 508 | lw=5, cmap=cmap, opp_comet=True, opp_transparent=True, 509 | label='Successful Passes', ax=ax) 510 | 511 | pitch.scatter(successful_box_passes.x/100*120, 80-successful_box_passes.y/100*80, 512 | edgecolors='white', c='white', s=50, zorder=2, 513 | ax=ax) 514 | 515 | # Set the title 516 | fig.suptitle(f'Completed Box Passes - {team}', y=.95, fontsize=15) 517 | 518 | # Set the subtitle 519 | ax.set_title('Data : Whoscored/Opta', fontsize=8, loc='right', fontstyle='italic', fontweight='bold') 520 | 521 | # set legend 522 | #ax.legend(facecolor='#22312b', edgecolor='None', fontsize=8, loc='lower center', handlelength=4) 523 | 524 | # Set the figure facecolor 525 | fig.set_facecolor(pitch_color) 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | def getTeamTotalPasses(events_df, teamId, team, opponent, pitch_color): 535 | """ 536 | 537 | 538 | Parameters 539 | ---------- 540 | events_df : DataFrame of all events. 541 | 542 | teamId : ID of the team, the passes of which are required. 543 | 544 | team : Name of the team, the passes of which are required. 545 | 546 | opponent : Name of opponent team. 547 | 548 | pitch_color : color of the pitch. 549 | 550 | 551 | Returns 552 | ------- 553 | Pitch Plot. 554 | """ 555 | 556 | # Get Total Passes 557 | passes_df = events_df.loc[events_df['type']=='Pass'].reset_index(drop=True) 558 | 559 | # Get Team Passes 560 | team_passes = passes_df.loc[passes_df['teamId'] == teamId] 561 | 562 | successful_passes = team_passes.loc[team_passes['outcomeType']=='Successful'].reset_index(drop=True) 563 | unsuccessful_passes = team_passes.loc[team_passes['outcomeType']=='Unsuccessful'].reset_index(drop=True) 564 | 565 | # Setup the pitch 566 | pitch = Pitch(pitch_type='statsbomb', pitch_color=pitch_color, line_color='#c7d5cc') 567 | fig, ax = pitch.draw(constrained_layout=True, tight_layout=False) 568 | # fig.set_size_inches(14, 10) 569 | 570 | # Plot the completed passes 571 | pitch.arrows(successful_passes.x/100*120, 80-successful_passes.y/100*80, 572 | successful_passes.endX/100*120, 80-successful_passes.endY/100*80, width=1, 573 | headwidth=10, headlength=10, color='#ad993c', ax=ax, label='Completed') 574 | 575 | # Plot the other passes 576 | pitch.arrows(unsuccessful_passes.x/100*120, 80-unsuccessful_passes.y/100*80, 577 | unsuccessful_passes.endX/100*120, 80-unsuccessful_passes.endY/100*80, width=1, 578 | headwidth=6, headlength=5, headaxislength=12, color='#ba4f45', ax=ax, label='Blocked') 579 | 580 | # setup the legend 581 | ax.legend(facecolor=pitch_color, handlelength=5, edgecolor='None', fontsize=8, loc='upper left', shadow=True, labelcolor='white') 582 | 583 | # Set the title 584 | fig.suptitle(f'{team} Passes vs {opponent}', y=1, fontsize=15) 585 | 586 | 587 | # Set the subtitle 588 | ax.set_title('Data : Whoscored/Opta', fontsize=8, loc='right', fontstyle='italic', fontweight='bold') 589 | 590 | 591 | # Set the figure facecolor 592 | 593 | fig.set_facecolor(pitch_color) 594 | 595 | 596 | 597 | 598 | 599 | 600 | def normalize(values, bounds): 601 | return [bounds['desired']['lower'] + (x - bounds['actual']['lower']) * (bounds['desired']['upper'] 602 | - bounds['desired']['lower']) / (bounds['actual']['upper'] - bounds['actual']['lower']) for x in values] 603 | 604 | 605 | 606 | 607 | 608 | def createPVFormationMap(match_data, events_df, team, color_palette, 609 | markerstyle, markersize, markeredgewidth, labelsize, labelcolor, ax): 610 | 611 | # getting team id and venue 612 | if match_data['home']['name'] == team: 613 | teamId = match_data['home']['teamId'] 614 | venue = 'home' 615 | else: 616 | teamId = match_data['away']['teamId'] 617 | venue = 'away' 618 | 619 | 620 | # getting opponent 621 | if venue == 'home': 622 | opponent = match_data['away']['name'] 623 | else: 624 | opponent = match_data['home']['name'] 625 | 626 | 627 | # getting player dictionary 628 | team_players_dict = {} 629 | for player in match_data[venue]['players']: 630 | team_players_dict[player['playerId']] = player['name'] 631 | 632 | 633 | # getting minute of first substitution 634 | for i,row in events_df.iterrows(): 635 | if row['type'] == 'SubstitutionOn' and row['teamId'] == teamId: 636 | sub_minute = str(row['minute']) 637 | break 638 | 639 | 640 | # getting players dataframe 641 | match_players_df = pd.DataFrame() 642 | player_names = [] 643 | player_ids = [] 644 | player_pos = [] 645 | player_kit_number = [] 646 | 647 | for player in match_data[venue]['players']: 648 | player_names.append(player['name']) 649 | player_ids.append(player['playerId']) 650 | player_pos.append(player['position']) 651 | player_kit_number.append(player['shirtNo']) 652 | 653 | match_players_df['playerId'] = player_ids 654 | match_players_df['playerName'] = player_names 655 | match_players_df['playerPos'] = player_pos 656 | match_players_df['playerKitNumber'] = player_kit_number 657 | 658 | 659 | # extracting passes 660 | passes_df = events_df.loc[events_df['teamId'] == teamId].reset_index().drop('index', axis=1) 661 | passes_df['playerId'] = passes_df['playerId'].astype('float').astype('Int64') 662 | if 'playerName' in passes_df.columns: 663 | passes_df = passes_df.drop(columns='playerName') 664 | passes_df.dropna(subset=["playerId"], inplace=True) 665 | passes_df.insert(27, column='playerName', value=[team_players_dict[i] for i in list(passes_df['playerId'])]) 666 | if 'passRecipientId' in passes_df.columns: 667 | passes_df = passes_df.drop(columns='passRecipientId') 668 | passes_df = passes_df.drop(columns='passRecipientName') 669 | passes_df.insert(28, column='passRecipientId', value=passes_df['playerId'].shift(-1)) 670 | passes_df.insert(29, column='passRecipientName', value=passes_df['playerName'].shift(-1)) 671 | passes_df.dropna(subset=["passRecipientName"], inplace=True) 672 | passes_df = passes_df.loc[events_df['type'] == 'Pass', :].reset_index(drop=True) 673 | passes_df = passes_df.loc[events_df['outcomeType'] == 'Successful', :].reset_index(drop=True) 674 | index_names = passes_df.loc[passes_df['playerName']==passes_df['passRecipientName']].index 675 | passes_df.drop(index_names, inplace=True) 676 | passes_df = passes_df.merge(match_players_df, on=['playerId', 'playerName'], how='left', validate='m:1') 677 | passes_df = passes_df.merge(match_players_df.rename({'playerId': 'passRecipientId', 'playerName':'passRecipientName'}, 678 | axis='columns'), on=['passRecipientId', 'passRecipientName'], 679 | how='left', validate='m:1', suffixes=['', 'Receipt']) 680 | # passes_df = passes_df[passes_df['playerPos'] != 'Sub'] 681 | 682 | 683 | # Getting net possesion value for passes 684 | netPVPassed = passes_df.groupby(['playerId', 'playerName'])['EPV'].sum().reset_index() 685 | netPVReceived = passes_df.groupby(['passRecipientId', 'passRecipientName'])['EPV'].sum().reset_index() 686 | 687 | 688 | 689 | # Getting formation and player ids for first 11 690 | formation = match_data[venue]['formations'][0]['formationName'] 691 | formation_positions = match_data[venue]['formations'][0]['formationPositions'] 692 | playerIds = match_data[venue]['formations'][0]['playerIds'][:11] 693 | 694 | 695 | # Getting all data in a dataframe 696 | formation_data = [] 697 | for playerId, pos in zip(playerIds, formation_positions): 698 | pl_dict = {'playerId': playerId} 699 | pl_dict.update(pos) 700 | formation_data.append(pl_dict) 701 | formation_data = pd.DataFrame(formation_data) 702 | formation_data['vertical'] = normalize(formation_data['vertical'], 703 | {'actual': {'lower': 0, 'upper': 10}, 'desired': {'lower': 10, 'upper': 110}}) 704 | formation_data['horizontal'] = normalize(formation_data['horizontal'], 705 | {'actual': {'lower': 0, 'upper': 10}, 'desired': {'lower': 80, 'upper': 0}}) 706 | formation_data = netPVPassed.join(formation_data.set_index('playerId'), on='playerId', how='inner').reset_index(drop=True) 707 | formation_data = formation_data.rename(columns={"EPV": "PV"}) 708 | 709 | 710 | # Plotting 711 | pitch = Pitch(pitch_type='statsbomb', pitch_color='#171717', line_color='#5c5c5c', 712 | goal_type='box') 713 | pitch.draw(ax=ax, constrained_layout=True, tight_layout=True) 714 | 715 | sns.scatterplot(x='vertical', y='horizontal', data=formation_data, hue='PV', s=markersize, marker=markerstyle, legend=False, 716 | palette=color_palette, linewidth=markeredgewidth, ax=ax) 717 | 718 | ax.text(2, 78, '{}'.format('-'.join(formation)), size=20, c='grey') 719 | 720 | for index, row in formation_data.iterrows(): 721 | pitch.annotate(str(round(row.PV*100,2))+'%', xy=(row.vertical, row.horizontal), c=labelcolor, va='center', 722 | ha='center', size=labelsize, zorder=2, weight='bold', ax=ax) 723 | pitch.annotate(row.playerName, xy=(row.vertical, row.horizontal+5), c=labelcolor, va='center', 724 | ha='center', size=labelsize, zorder=2, weight='bold', ax=ax) 725 | 726 | 727 | 728 | --------------------------------------------------------------------------------