├── .github
└── workflows
│ ├── automated_adv_stats.yml
│ ├── automated_fb_advanced_match_stats.yml
│ ├── automated_fb_match_shooting.yml
│ ├── automated_fb_match_summary.yml
│ ├── automated_match_results.yml
│ ├── automated_match_results_cups.yml
│ ├── automated_understat_shots.yml
│ └── main.yml
├── .gitignore
├── R
├── fb_advanced_match_stats
│ ├── backfill_fb_advanced_match_stats.R
│ ├── shared_fb_advanced_match_stats.R
│ └── update_fb_advanced_match_stats.R
├── fb_big5_advanced_season_stats
│ ├── backfill_big5_advanced_stats.R
│ └── update_big5_advanced_stats.R
├── fb_match_results
│ ├── backfill_fb_match_results.R
│ └── update_fb_match_results.R
├── fb_match_results_cups
│ ├── backfill_fb_cups_match_results.R
│ └── update_fb_cups_match_results.R
├── fb_match_shooting
│ ├── adhoc_fb_match_shooting.R
│ ├── backfill_fb_match_shooting.R
│ ├── shared_fb_match_shooting.R
│ └── update_fb_match_shooting.R
├── fb_match_summary
│ ├── backfill_fb_match_summary.R
│ ├── shared_fb_match_summary.R
│ └── update_fb_match_summary.R
├── piggyback.R
├── tm_player_vals
│ ├── backfill_big5_player_vals.R
│ └── update_big5_player_vals.R
├── tm_transfers
│ └── backfill_big5_transfers.R
└── understat_league_shots
│ ├── backup_understat_local.R
│ └── update_understat_shots.R
├── README.md
├── data
├── fb_advanced_match_stats
│ └── .gitignore
├── fb_big5_advanced_season_stats
│ ├── big5_player_defense.rds
│ ├── big5_player_gca.rds
│ ├── big5_player_keepers.rds
│ ├── big5_player_keepers_adv.rds
│ ├── big5_player_misc.rds
│ ├── big5_player_passing.rds
│ ├── big5_player_passing_types.rds
│ ├── big5_player_playing_time.rds
│ ├── big5_player_possession.rds
│ ├── big5_player_shooting.rds
│ ├── big5_player_standard.rds
│ ├── big5_team_defense.rds
│ ├── big5_team_gca.rds
│ ├── big5_team_keepers.rds
│ ├── big5_team_keepers_adv.rds
│ ├── big5_team_misc.rds
│ ├── big5_team_passing.rds
│ ├── big5_team_passing_types.rds
│ ├── big5_team_playing_time.rds
│ ├── big5_team_possession.rds
│ ├── big5_team_shooting.rds
│ └── big5_team_standard.rds
├── fb_big5_advanced_statsbomb
│ ├── README.md
│ ├── big5_player_defense.rds
│ ├── big5_player_gca.rds
│ ├── big5_player_keepers.rds
│ ├── big5_player_keepers_adv.rds
│ ├── big5_player_misc.rds
│ ├── big5_player_passing.rds
│ ├── big5_player_passing_types.rds
│ ├── big5_player_playing_time.rds
│ ├── big5_player_possession.rds
│ ├── big5_player_shooting.rds
│ ├── big5_player_standard.rds
│ ├── big5_team_defense.rds
│ ├── big5_team_gca.rds
│ ├── big5_team_keepers.rds
│ ├── big5_team_keepers_adv.rds
│ ├── big5_team_misc.rds
│ ├── big5_team_passing.rds
│ ├── big5_team_passing_types.rds
│ ├── big5_team_playing_time.rds
│ ├── big5_team_possession.rds
│ ├── big5_team_shooting.rds
│ └── big5_team_standard.rds
├── fb_match_shooting
│ └── .gitignore
├── fb_match_summary
│ └── .gitignore
├── fotmob_match_details
│ └── .gitignore
├── match_results
│ ├── ARG_match_results.rds
│ ├── AUS_match_results.rds
│ ├── AUT_match_results.rds
│ ├── BEL_match_results.rds
│ ├── BOL_match_results.rds
│ ├── BRA_match_results.rds
│ ├── BUL_match_results.rds
│ ├── CAN_match_results.rds
│ ├── CHI_match_results.rds
│ ├── CHN_match_results.rds
│ ├── COL_match_results.rds
│ ├── CRO_match_results.rds
│ ├── CZE_match_results.rds
│ ├── DEN_match_results.rds
│ ├── ECU_match_results.rds
│ ├── ENG_match_results.rds
│ ├── ESP_match_results.rds
│ ├── FIN_match_results.rds
│ ├── FRA_match_results.rds
│ ├── GER_match_results.rds
│ ├── GRE_match_results.rds
│ ├── HUN_match_results.rds
│ ├── IND_match_results.rds
│ ├── IRN_match_results.rds
│ ├── ITA_match_results.rds
│ ├── JPN_match_results.rds
│ ├── KOR_match_results.rds
│ ├── KSA_match_results.rds
│ ├── MEX_match_results.rds
│ ├── NED_match_results.rds
│ ├── NOR_match_results.rds
│ ├── PAR_match_results.rds
│ ├── PER_match_results.rds
│ ├── POL_match_results.rds
│ ├── POR_match_results.rds
│ ├── ROU_match_results.rds
│ ├── RSA_match_results.rds
│ ├── RUS_match_results.rds
│ ├── SCO_match_results.rds
│ ├── SRB_match_results.rds
│ ├── SUI_match_results.rds
│ ├── SWE_match_results.rds
│ ├── TUR_match_results.rds
│ ├── UKR_match_results.rds
│ ├── URU_match_results.rds
│ ├── USA_match_results.rds
│ └── VEN_match_results.rds
├── match_results_cups
│ ├── README.Rmd
│ ├── README.md
│ ├── afc_asian_cup_match_results.rds
│ ├── afc_asian_cup_qualification_match_results.rds
│ ├── afc_womens_asian_cup_match_results.rds
│ ├── afc_womens_asian_cup_qualification_match_results.rds
│ ├── africa_cup_of_nations_match_results.rds
│ ├── africa_cup_of_nations_qualification_match_results.rds
│ ├── africa_women_cup_of_nations_match_results.rds
│ ├── algarve_cup_match_results.rds
│ ├── concacaf_gold_cup_match_results.rds
│ ├── concacaf_w_championship_match_results.rds
│ ├── copa_america_femenina_match_results.rds
│ ├── copa_america_match_results.rds
│ ├── copa_del_rey_match_results.rds
│ ├── copa_libertadores_de_america_match_results.rds
│ ├── copa_sudamericana_match_results.rds
│ ├── coppa_italia_match_results.rds
│ ├── coupe_de_france_match_results.rds
│ ├── coupe_de_la_ligue_match_results.rds
│ ├── dfb_pokal_frauen_match_results.rds
│ ├── dfb_pokal_match_results.rds
│ ├── efl_cup_match_results.rds
│ ├── english_football_league_cup_match_results.rds
│ ├── european_championship_match_results.rds
│ ├── fa_cup_match_results.rds
│ ├── fifa_confederations_cup_match_results.rds
│ ├── fifa_womens_world_cup_match_results.rds
│ ├── fifa_womens_world_cup_qualification_uefa_match_results.rds
│ ├── fifa_world_cup_match_results.rds
│ ├── fifa_world_cup_qualification_afc_match_results.rds
│ ├── fifa_world_cup_qualification_caf_match_results.rds
│ ├── fifa_world_cup_qualification_concacaf_match_results.rds
│ ├── fifa_world_cup_qualification_conmebol_match_results.rds
│ ├── fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds
│ ├── fifa_world_cup_qualification_ofc_match_results.rds
│ ├── fifa_world_cup_qualification_uefa_match_results.rds
│ ├── international_friendlies_m_match_results.rds
│ ├── international_friendlies_w_match_results.rds
│ ├── nwsl_challenge_cup_match_results.rds
│ ├── nwsl_fall_series_match_results.rds
│ ├── ofc_nations_cup_match_results.rds
│ ├── ofc_womens_nations_cup_match_results.rds
│ ├── olympics_womens_tournament_match_results.rds
│ ├── she_believes_cup_match_results.rds
│ ├── uefa_champions_league_match_results.rds
│ ├── uefa_euro_qualification_match_results.rds
│ ├── uefa_europa_conference_league_match_results.rds
│ ├── uefa_europa_league_match_results.rds
│ ├── uefa_european_football_championship_qualifying_match_results.rds
│ ├── uefa_nations_league_match_results.rds
│ ├── uefa_womens_champions_league_match_results.rds
│ ├── uefa_womens_championship_match_results.rds
│ └── uefa_womens_euro_qualification_match_results.rds
├── tm_player_vals
│ └── big5_player_vals.rds
├── tm_transfers
│ └── big_5_transfers.rds
└── understat_shots
│ ├── bundesliga_shot_data.rds
│ ├── epl_shot_data.rds
│ ├── la_liga_shot_data.rds
│ ├── ligue_1_shot_data.rds
│ ├── rfpl_shot_data.rds
│ └── serie_a_shot_data.rds
├── man
└── figures
│ ├── hex_sticker.R
│ ├── logo.png
│ └── logo_small_size.png
├── raw-data
├── all_leages_and_cups
│ ├── all_competitions.csv
│ └── get_all_comp_seasons.R
├── countries_list
│ ├── countries_df.csv
│ └── get_countries_list.R
├── fbref-tm-player-mapping
│ ├── README.md
│ ├── create_final_data.R
│ ├── data
│ │ └── tm_data.rds
│ ├── extra-leagues
│ │ ├── create_final_data.R
│ │ └── initial-match
│ │ │ ├── build_mapping_dictionary.R
│ │ │ ├── create_final_data_initial.R
│ │ │ ├── fbref_extra_leagues.rds
│ │ │ ├── fbref_mls.rds
│ │ │ ├── fbref_selenium.R
│ │ │ ├── get_tm_extra_leagues.R
│ │ │ ├── joined_finished.csv
│ │ │ ├── mls
│ │ │ ├── build_mapping_dictionary.R
│ │ │ ├── create_final_data_initial.R
│ │ │ ├── duplicate_players_df.csv
│ │ │ ├── duplicate_players_df_manual_fix.csv
│ │ │ ├── get_data.R
│ │ │ ├── joined_finished.csv
│ │ │ ├── joined_missing.csv
│ │ │ ├── joined_missing_manual_fix.csv
│ │ │ └── tm_unique.csv
│ │ │ ├── tm_players_championship.rds
│ │ │ ├── tm_players_extra_tier1.rds
│ │ │ ├── tm_players_mls.rds
│ │ │ └── working-files
│ │ │ ├── duplicate_players_df.csv
│ │ │ ├── duplicate_players_df_manual_fix.csv
│ │ │ ├── joined_missing.csv
│ │ │ └── tm_unique.csv
│ ├── output
│ │ ├── fbref_to_tm_mapping.csv
│ │ ├── initial-match
│ │ │ ├── build_mapping_dictionary.R
│ │ │ ├── fbref_to_tm_up_to_20-21.csv
│ │ │ ├── joined_finished.csv
│ │ │ └── working-files
│ │ │ │ ├── duplicate_players_df.csv
│ │ │ │ ├── duplicate_players_df_manual_fix.csv
│ │ │ │ ├── joined_missing.csv
│ │ │ │ ├── joined_missing_manual_fix.csv
│ │ │ │ └── tm_unique.csv
│ │ └── working-files
│ │ │ ├── duplicate_players_df.csv
│ │ │ ├── joined_finished.csv
│ │ │ ├── joined_missing.csv
│ │ │ ├── joined_missing_manual_fix.csv
│ │ │ └── tm_unique.csv
│ ├── prepare_working_files.R
│ └── update_player_positions.R
├── fotmob-leagues
│ └── all_leagues.csv
├── job_controller.R
├── league_seasons
│ ├── all_tier1_season_URLs.csv
│ └── get_league_seasons.R
├── transfermarkt_leagues
│ ├── get_transfermarkt_metadata.R
│ └── main_comp_seasons.csv
└── transfermarkt_staff
│ ├── get_staff_types.R
│ └── tm_staff_types.csv
└── worldfootballR_data.Rproj
/.github/workflows/automated_adv_stats.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Store advanced stats
3 |
4 | # Controls when the action will run - have set this to run at:
5 | # 02:15 on Tuesday, Thursday, and Sunday
6 | on:
7 | schedule:
8 | - cron: "30 17 * * 0,2,4"
9 |
10 |
11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
12 | jobs:
13 | # This workflow contains a single job called "build"
14 | update-adv-stats:
15 | # The type of runner that the job will run on
16 | runs-on: macOS-latest
17 | # retrieve token
18 | env:
19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 | # Steps represent a sequence of tasks that will be executed as part of the job
21 | steps:
22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
23 | - uses: actions/checkout@v2
24 | - uses: r-lib/actions/setup-r@v2
25 | - name: Package Installation
26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))'
27 | - name: worldfootballR Package Installation
28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
29 | - name: Update advanced season stats
30 | run: Rscript -e 'source(here::here("R", "fb_big5_advanced_season_stats", "update_big5_advanced_stats.R"), echo = TRUE)'
31 | - name: Commit
32 | run: |
33 | git config --global user.name 'JaseZiv'
34 | git config --global user.email 'jaseziv83@gmail.com'
35 | git add .
36 | git commit -m 'updating data' || echo "No changes to commit"
37 | git push || echo "No changes to commit"
38 |
--------------------------------------------------------------------------------
/.github/workflows/automated_fb_advanced_match_stats.yml:
--------------------------------------------------------------------------------
1 | name: Scrape FBref advanced match stats
2 |
3 | on:
4 | schedule:
5 | - cron: "13 10,22 * * *"
6 | workflow_dispatch:
7 |
8 | jobs:
9 | update-fb-advanced-match-stats:
10 | runs-on: macOS-latest
11 | env:
12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: r-lib/actions/setup-r@v2
16 | - name: Package Installation
17 | run: Rscript -e 'install.packages(c("remotes", "dplyr", "purrr", "tidyr", "tibble", "readr", "piggyback"))'
18 | - name: worldfootballR Package Installation
19 | run: Rscript -e 'remotes::install_github("JaseZiv/worldfootballR")'
20 | - name: Update match stats
21 | run: Rscript -e 'source(file.path("R", "fb_advanced_match_stats", "update_fb_advanced_match_stats.R"), echo = TRUE)'
22 |
--------------------------------------------------------------------------------
/.github/workflows/automated_fb_match_shooting.yml:
--------------------------------------------------------------------------------
1 | name: Scrape FBref match shooting
2 |
3 | on:
4 | schedule:
5 | - cron: "45 17 * 1-5,8-12 0,2,4"
6 | workflow_dispatch:
7 |
8 | jobs:
9 | update-fb-match-shooting:
10 | runs-on: macOS-latest
11 | env:
12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: r-lib/actions/setup-r@v2
16 | - name: Package Installation
17 | run: Rscript -e 'install.packages(c("devtools", "dplyr", "purrr", "tidyr", "tibble", "lubridate", "readr", "piggyback"))'
18 | - name: worldfootballR Package Installation
19 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
20 | - name: Update match results
21 | run: Rscript -e 'source(file.path("R", "fb_match_shooting", "update_fb_match_shooting.R"), echo = TRUE)'
22 |
--------------------------------------------------------------------------------
/.github/workflows/automated_fb_match_summary.yml:
--------------------------------------------------------------------------------
1 | name: Scrape FBref match summaries
2 |
3 | on:
4 | schedule:
5 | - cron: "58 17 * * 0,2,4"
6 | workflow_dispatch:
7 |
8 | jobs:
9 | update-fb-advanced-match-stats:
10 | runs-on: macOS-latest
11 | env:
12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: r-lib/actions/setup-r@v2
16 | - name: Package Installation
17 | run: Rscript -e 'install.packages(c("remotes", "dplyr", "purrr", "tidyr", "tibble", "readr", "piggyback"))'
18 | - name: worldfootballR Package Installation
19 | run: Rscript -e 'remotes::install_github("JaseZiv/worldfootballR")'
20 | - name: Update match stats
21 | run: Rscript -e 'source(file.path("R", "fb_match_summary", "update_fb_match_summary.R"), echo = TRUE)'
22 |
--------------------------------------------------------------------------------
/.github/workflows/automated_match_results.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Store match results
3 |
4 | # Controls when the action will run - have set this to run at:
5 | # 02:00 on Tuesday, Thursday, and Sunday
6 | on:
7 | schedule:
8 | - cron: "15 17 * * 0,2,4"
9 |
10 |
11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
12 | jobs:
13 | # This workflow contains a single job called "build"
14 | update-match-results:
15 | # The type of runner that the job will run on
16 | runs-on: macOS-latest
17 | # retrieve token
18 | env:
19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 | # Steps represent a sequence of tasks that will be executed as part of the job
21 | steps:
22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
23 | - uses: actions/checkout@v2
24 | - uses: r-lib/actions/setup-r@v2
25 | - name: Package Installation
26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))'
27 | - name: worldfootballR Package Installation
28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
29 | - name: Update match results
30 | run: Rscript -e 'source(here::here("R", "fb_match_results", "update_fb_match_results.R"), echo = TRUE)'
31 | - name: Commit
32 | run: |
33 | git config --global user.name 'JaseZiv'
34 | git config --global user.email 'jaseziv83@gmail.com'
35 | git add .
36 | git commit -m 'updating data' || echo "No changes to commit"
37 | git push || echo "No changes to commit"
38 |
--------------------------------------------------------------------------------
/.github/workflows/automated_match_results_cups.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Store International and Cups match results
3 |
4 | # Controls when the action will run - have set this to run at:
5 | # 02:00 on Tuesday, Thursday, and Sunday in
6 | # January, February, March, April, May, August, September, October, November, and December.
7 | on:
8 | schedule:
9 | - cron: "0 17 * * 0,2,4"
10 |
11 |
12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
13 | jobs:
14 | # This workflow contains a single job called "build"
15 | update-match-results:
16 | # The type of runner that the job will run on
17 | runs-on: macOS-latest
18 | # retrieve token
19 | env:
20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
21 | # Steps represent a sequence of tasks that will be executed as part of the job
22 | steps:
23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 | - uses: actions/checkout@v2
25 | - uses: r-lib/actions/setup-r@v2
26 | - name: Package Installation
27 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "lubridate", "janitor", "piggyback"))'
28 | - name: worldfootballR Package Installation
29 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
30 | - name: Update cup match results
31 | run: Rscript -e 'source(here::here("R", "fb_match_results_cups", "update_fb_cups_match_results.R"), echo = TRUE)'
32 | - name: Commit
33 | run: |
34 | git config --global user.name 'JaseZiv'
35 | git config --global user.email 'jaseziv83@gmail.com'
36 | git add .
37 | git commit -m 'updating data' || echo "No changes to commit"
38 | git push || echo "No changes to commit"
39 |
--------------------------------------------------------------------------------
/.github/workflows/automated_understat_shots.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Scrape understat shots
3 |
4 | # Controls when the action will run - have set this to run at:
5 | # 02:00 on Tuesday, Thursday, and Sunday in
6 | # January, February, March, April, May, July, August, September, October, November, and December.
7 | on:
8 | schedule:
9 | - cron: "15 18 * 1-5,7-12 0,2,4"
10 |
11 |
12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
13 | jobs:
14 | # This workflow contains a single job called "build"
15 | update-understat-shots:
16 | # The type of runner that the job will run on
17 | runs-on: macOS-latest
18 | # retrieve token
19 | env:
20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
21 | # Steps represent a sequence of tasks that will be executed as part of the job
22 | steps:
23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 | - uses: actions/checkout@v2
25 | - uses: r-lib/actions/setup-r@v2
26 | - name: Package Installation
27 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))'
28 | - name: worldfootballR Package Installation
29 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
30 | - name: Update match results
31 | run: Rscript -e 'source(here::here("R", "understat_league_shots", "update_understat_shots.R"), echo = TRUE)'
32 | - name: Commit
33 | run: |
34 | git config --global user.name 'JaseZiv'
35 | git config --global user.email 'jaseziv83@gmail.com'
36 | git add .
37 | git commit -m 'updating data' || echo "No changes to commit"
38 | git push || echo "No changes to commit"
39 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: run_extracts
4 |
5 | # Controls when the action will run.
6 | on:
7 | schedule:
8 | - cron: "30 12 * * 5"
9 |
10 |
11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
12 | jobs:
13 | # This workflow contains a single job called "build"
14 | scrape-and-push:
15 | # The type of runner that the job will run on
16 | runs-on: macOS-latest
17 | # retrieve token
18 | env:
19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 | # Steps represent a sequence of tasks that will be executed as part of the job
21 | steps:
22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
23 | - uses: actions/checkout@v2
24 | - uses: r-lib/actions/setup-r@v2
25 | - name: Package Installation
26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "rvest", "httr", "stringr", "here", "xml2", "purrr", "janitor", "glue", "ratelimitr", "piggyback"))'
27 | - name: worldfootballR Package Installation
28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")'
29 | - name: Updated Data
30 | run: Rscript -e 'source(here::here("raw-data", "job_controller.R"), echo = TRUE)'
31 | - name: Commit
32 | run: |
33 | git config --global user.name 'JaseZiv'
34 | git config --global user.email 'jaseziv83@gmail.com'
35 | git add .
36 | git commit -m 'updating data' || echo "No changes to commit"
37 | git push || echo "No changes to commit"
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 | .DS_Store
5 | .Rproj.user
6 |
--------------------------------------------------------------------------------
/R/fb_advanced_match_stats/shared_fb_advanced_match_stats.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | params <- bind_rows(
3 | 'big5' = list(
4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'),
5 | tier = '1st',
6 | gender = 'M'
7 | ),
8 | 'other_1st_M' = list(
9 | country = 'USA',
10 | tier = '1st',
11 | gender = 'M'
12 | ),
13 | # 'other_1st_M' = list(
14 | # country = c('POR', 'NED', 'BRA', 'MEX', 'USA'),
15 | # tier = '1st',
16 | # gender = 'M'
17 | # ),
18 | # '1st_F' = list(
19 | # country = c('ENG', 'USA'),
20 | # tier = '1st',
21 | # gender = 'F'
22 | # ),
23 | # '2nd_M' = list(
24 | # country = c('ENG'),
25 | # tier = '2nd',
26 | # gender = 'M'
27 | # ),
28 | .id = 'group'
29 | )
30 |
--------------------------------------------------------------------------------
/R/fb_advanced_match_stats/update_fb_advanced_match_stats.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 |
9 | source(file.path('R', 'piggyback.R'))
10 | source(file.path('R', 'fb_advanced_match_stats', 'shared_fb_advanced_match_stats.R'))
11 |
12 | all_seasons <- readr::read_csv(
13 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv'
14 | )
15 |
16 | seasons <- all_seasons |>
17 | dplyr::semi_join(
18 | params,
19 | by = c('country', 'tier', 'gender')
20 | ) |>
21 | dplyr::filter(season_end_year >= 2017L) |>
22 | dplyr::distinct(
23 | country,
24 | gender,
25 | tier,
26 | season_end_year
27 | )
28 |
29 | scrape_fb_advanced_match_stats <- function(url, stat_type, team_or_player) {
30 | message(
31 | sprintf(
32 | 'Scraping matches for url = `"%s"`,\n`stat_type = "%s"`, `team_or_player = "%s"`.',
33 | url,
34 | stat_type,
35 | team_or_player
36 | )
37 | )
38 | worldfootballR::fb_advanced_match_stats(
39 | url,
40 | stat_type = stat_type,
41 | team_or_player = team_or_player
42 | )
43 | }
44 |
45 | possibly_scrape_fb_advanced_match_stats <- purrr::possibly(
46 | scrape_fb_advanced_match_stats,
47 | otherwise = tibble::tibble(),
48 | quiet = FALSE
49 | )
50 |
51 | slowly_possibly_scrape_fb_advanced_match_stats <- purrr::slowly(
52 | possibly_scrape_fb_advanced_match_stats,
53 | rate = purrr::rate_delay(pause = 5),
54 | quiet = TRUE
55 | )
56 |
57 | bind_with_type_coercion <- function(df1, df2) {
58 | common_cols <- intersect(names(df1), names(df2))
59 |
60 | class_df1 <- sapply(df1[common_cols], class)
61 | class_df2 <- sapply(df2[common_cols], class)
62 |
63 | cols_to_coerce <- common_cols[class_df1 != class_df2]
64 |
65 | if (length(cols_to_coerce) > 0) {
66 | message(
67 | sprintf('Coerceing these columns to strings: `%s`', paste0(cols_to_coerce, collapse = '`, `'))
68 | )
69 | df1[cols_to_coerce] <- lapply(df1[cols_to_coerce], as.character)
70 | df2[cols_to_coerce] <- lapply(df2[cols_to_coerce], as.character)
71 | }
72 |
73 | dplyr::bind_rows(df1, df2)
74 | }
75 |
76 | fb_advanced_match_stats_tag <- 'fb_advanced_match_stats'
77 | update_fb_advanced_match_stats <- function(
78 | country = 'ENG',
79 | gender = 'M',
80 | tier = '1st',
81 | stat_type = 'summary',
82 | team_or_player = 'player'
83 | ) {
84 | name <- sprintf('%s_%s_%s_%s_%s_advanced_match_stats', country, gender, tier, stat_type, team_or_player)
85 | message(sprintf('Updating %s.', name))
86 |
87 | filtered_seasons <- seasons |>
88 | dplyr::filter(
89 | country == !!country,
90 | gender == !!gender,
91 | tier == !!tier
92 | ) |>
93 | dplyr::pull(season_end_year)
94 |
95 | latest_season <- max(filtered_seasons)
96 |
97 | match_urls <- worldfootballR::fb_match_urls(
98 | country = country,
99 | tier = tier,
100 | gender = gender,
101 | season_end_year = latest_season
102 | )
103 |
104 | existing_data <- read_worldfootballr(
105 | name = name,
106 | tag = fb_advanced_match_stats_tag
107 | )
108 | existing_match_urls <- unique(existing_data$MatchURL)
109 | new_match_urls <- setdiff(match_urls, existing_match_urls)
110 |
111 | if (length(new_match_urls) == 0) {
112 | message(
113 | sprintf('No new match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `stat_type = "%s"`, `team_or_player = "%s"`', country, gender, tier, stat_type, team_or_player)
114 | )
115 | return(existing_data)
116 | }
117 |
118 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC')
119 |
120 | new_data <- new_match_urls |>
121 | rlang::set_names() |>
122 | purrr::map_dfr(
123 | \(.x) slowly_possibly_scrape_fb_advanced_match_stats(
124 | url = .x,
125 | stat_type = stat_type,
126 | team_or_player = team_or_player
127 | ),
128 | .id = 'MatchURL'
129 | ) |>
130 | dplyr::relocate(MatchURL, .before = 1)
131 |
132 | match_results <- worldfootballR::load_match_results(
133 | country = country,
134 | tier = tier,
135 | gender = gender,
136 | season_end_year = filtered_seasons
137 | )
138 |
139 | res <- bind_with_type_coercion(
140 | existing_data,
141 | new_data |>
142 | dplyr::inner_join(
143 | match_results |>
144 | dplyr::transmute(
145 | Competition_Name,
146 | Gender,
147 | Country,
148 | Tier = .env$tier,
149 | Season_End_Year,
150 | MatchURL
151 | ),
152 | by = 'MatchURL'
153 | )
154 | ) |>
155 | tibble::as_tibble()
156 |
157 | attr(res, 'scrape_timestamp') <- scrape_time_utc
158 |
159 | write_worldfootballr_rds_and_csv(
160 | x = res,
161 | name = name,
162 | tag = fb_advanced_match_stats_tag
163 | )
164 |
165 | res
166 | }
167 |
168 | current_time <- lubridate::now(tzone = 'UTC')
169 | current_wday <- lubridate::wday(current_time)
170 | current_hour <- lubridate::hour(current_time)
171 |
172 | team_or_players <- if (current_wday %% 2 == 0) {
173 | 'player'
174 | } else {
175 | 'team'
176 | }
177 |
178 | stat_types <- if (current_hour <= 12) {
179 | c('summary', 'passing', 'passing_types')
180 | } else {
181 | c('defense', 'possession', 'misc', 'keeper')
182 | }
183 |
184 | params |>
185 | tidyr::crossing(
186 | stat_type = factor(stat_types, levels = c('summary', 'passing', 'passing_types', 'defense', 'possession', 'misc', 'keeper')),
187 | team_or_player = factor(team_or_players, levels = c('team', 'player'))
188 | ) |>
189 | dplyr::arrange(
190 | stat_type,
191 | team_or_player
192 | ) |>
193 | dplyr::mutate(
194 | data = purrr::pmap(
195 | list(
196 | country,
197 | gender,
198 | tier,
199 | stat_type,
200 | team_or_player
201 | ),
202 | ~update_fb_advanced_match_stats(
203 | country = ..1,
204 | gender = ..2,
205 | tier = ..3,
206 | stat_type = ..4,
207 | team_or_player = ..5
208 | )
209 | )
210 | )
211 |
212 |
--------------------------------------------------------------------------------
/R/fb_big5_advanced_season_stats/backfill_big5_advanced_stats.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | source("R/piggyback.R")
6 |
7 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
8 |
9 |
10 | latest_season <- seasons %>%
11 | # filtering out things that aren't domestic leagues:
12 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Big 5 European Leagues")) %>%
13 | group_by(country) %>% slice_max(season_end_year) %>%
14 | pull(season_end_year)
15 |
16 | #================================================================================================
17 | # Player Advanced Stats ---------------------------------------------------
18 | #================================================================================================
19 |
20 | backfill_player_advanced <- function(season_end, stat) {
21 | df <- fb_big5_advanced_season_stats(season_end_year= season_end, stat_type= stat, team_or_player= "player")
22 | df <- df %>% relocate(Url, .after = last_col())
23 |
24 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
25 | attr(df, "scrape_timestamp") <- scrape_time_utc
26 |
27 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", stat, ".rds")))
28 | write_worldfootballr(x=df, name = paste0("big5_player_", stat), tag = "fb_big5_advanced_season_stats", ext = "rds")
29 |
30 | }
31 |
32 |
33 | backfill_player_advanced(season_end= c(2010:2023), stat= "standard")
34 | backfill_player_advanced(season_end= c(2010:2023), stat= "playing_time")
35 | backfill_player_advanced(season_end= c(2010:2023), stat= "shooting")
36 | backfill_player_advanced(season_end= c(2018:2023), stat= "passing")
37 | backfill_player_advanced(season_end= c(2018:2023), stat= "passing_types")
38 | backfill_player_advanced(season_end= c(2018:2023), stat= "gca")
39 | backfill_player_advanced(season_end= c(2018:2023), stat= "defense")
40 | backfill_player_advanced(season_end= c(2018:2023), stat= "possession")
41 | backfill_player_advanced(season_end= c(2010:2023), stat= "misc")
42 | backfill_player_advanced(season_end= c(2010:2023), stat= "keepers")
43 | backfill_player_advanced(season_end= c(2018:2023), stat= "keepers_adv")
44 |
45 |
46 | #================================================================================================
47 | # Team Advanced Stats -----------------------------------------------------
48 | #================================================================================================
49 |
50 | backfill_team_advanced <- function(season_end, stat) {
51 | df <- fb_big5_advanced_season_stats(season_end_year= season_end, stat_type= stat, team_or_player= "team")
52 | df <- df %>% relocate(Url, .after = last_col())
53 |
54 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
55 | attr(df, "scrape_timestamp") <- scrape_time_utc
56 |
57 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", stat, ".rds")))
58 | write_worldfootballr(x=df, name = paste0("big5_team_", stat), tag = "fb_big5_advanced_season_stats", ext = "rds")
59 |
60 | }
61 |
62 |
63 | backfill_team_advanced(season_end= c(2010:2023), stat= "playing_time")
64 |
65 | backfill_team_advanced(season_end= c(2010:2023), stat= "standard")
66 | backfill_team_advanced(season_end= c(2010:2023), stat= "shooting")
67 | backfill_team_advanced(season_end= c(2018:2023), stat= "passing")
68 | backfill_team_advanced(season_end= c(2018:2023), stat= "passing_types")
69 | backfill_team_advanced(season_end= c(2018:2023), stat= "gca")
70 | backfill_team_advanced(season_end= c(2018:2023), stat= "defense")
71 | backfill_team_advanced(season_end= c(2018:2023), stat= "possession")
72 | backfill_team_advanced(season_end= c(2010:2023), stat= "misc")
73 | backfill_team_advanced(season_end= c(2010:2023), stat= "keepers")
74 | backfill_team_advanced(season_end= c(2018:2023), stat= "keepers_adv")
75 |
76 |
77 |
--------------------------------------------------------------------------------
/R/fb_big5_advanced_season_stats/update_big5_advanced_stats.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(dplyr)
3 | library(stringr)
4 | library(here)
5 |
6 | source("R/piggyback.R")
7 |
8 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
9 |
10 |
11 | latest_season <- seasons %>%
12 | # filtering out things that aren't domestic leagues:
13 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Big 5 European Leagues")) %>%
14 | group_by(country) %>% slice_max(season_end_year) %>%
15 | pull(season_end_year)
16 |
17 |
18 |
19 | stat_types <- c("standard", "playing_time", "shooting", "passing", "passing_types", "gca",
20 | "defense", "possession", "misc", "keepers", "keepers_adv")
21 |
22 |
23 | #==========================================================================================
24 | # Update Player Advanced Stats --------------------------------------------
25 | #==========================================================================================
26 |
27 | for(each_stat in stat_types) {
28 |
29 | print(paste0("Updating player stat: ", each_stat))
30 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
31 |
32 | df <- read_worldfootballr_rds(name=paste0("big5_player_", each_stat), tag = "fb_big5_advanced_season_stats")
33 | # df <- readRDS(here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", each_stat, ".rds")))
34 |
35 | new_dat <- fb_big5_advanced_season_stats(season_end_year= latest_season, stat_type= each_stat, team_or_player= "player", time_pause = 5)
36 |
37 | df <- df %>%
38 | filter(Season_End_Year != latest_season)
39 |
40 | df <- bind_rows(df, new_dat)
41 |
42 | attr(df, "scrape_timestamp") <- scrape_time_utc
43 |
44 | write_worldfootballr(x=df, name = paste0("big5_player_", each_stat), tag = "fb_big5_advanced_season_stats", ext = "rds")
45 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", each_stat, ".rds")))
46 | }
47 |
48 |
49 |
50 | #==========================================================================================
51 | # Update Team Advanced Stats ----------------------------------------------
52 | #==========================================================================================
53 |
54 | for(each_stat in stat_types) {
55 |
56 | print(paste0("Updating team stat: ", each_stat))
57 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
58 |
59 | df <- read_worldfootballr_rds(name=paste0("big5_team_", each_stat), tag = "fb_big5_advanced_season_stats")
60 | # df <- readRDS(here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", each_stat, ".rds")))
61 |
62 | new_dat <- fb_big5_advanced_season_stats(season_end_year= latest_season, stat_type= each_stat, team_or_player= "team", time_pause = 5)
63 |
64 | df <- df %>%
65 | filter(Season_End_Year != latest_season)
66 |
67 | df <- bind_rows(df, new_dat)
68 |
69 | attr(df, "scrape_timestamp") <- scrape_time_utc
70 |
71 | write_worldfootballr(x=df, name = paste0("big5_team_", each_stat), tag = "fb_big5_advanced_season_stats", ext = "rds")
72 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", each_stat, ".rds")))
73 | }
74 |
75 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
76 | # saveRDS(scrape_time_utc, here("data", "fb_big5_advanced_season_stats", "scrape_time_big5_advanced_season_stats.rds"))
77 |
78 |
--------------------------------------------------------------------------------
/R/fb_match_results/update_fb_match_results.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | source("R/piggyback.R")
6 |
7 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
8 |
9 |
10 | latest_seasons <- seasons %>%
11 | # filtering out things that aren't domestic leagues:
12 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues"),
13 | tier != "",
14 | !is.na(.data[["country"]])) %>%
15 | filter(!is.na(country), country != "") %>%
16 | group_by(country) %>% slice_max(season_end_year) %>%
17 | distinct()
18 |
19 |
20 |
21 | countries_to_get <- latest_seasons %>%
22 | # filtering out things that aren't domestic leagues:
23 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues"),
24 | tier != "",
25 | !is.na(.data[["country"]])) %>%
26 | filter(!is.na(country), country != "") %>%
27 | # also want to keep only seasons that are not yet completed
28 | filter(!is_completed) %>% pull(country) %>% unique()
29 |
30 |
31 | #=======================================================================================
32 | # Update Match Results ----------------------------------------------------
33 | #=======================================================================================
34 |
35 | update_fb_match_results <- function(each_country) {
36 |
37 | print(paste0("Getting Country: ", each_country))
38 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
39 |
40 | # dat_url <- paste0("https://github.com/JaseZiv/worldfootballR_data/blob/master/data/match_results/", each_country, "_match_results.rds?raw=true")
41 | #
42 | # existing_df <- .file_reader(dat_url)
43 |
44 | existing_df <- read_worldfootballr_rds(name=paste0(each_country, "_match_results"), tag = "match_results")
45 | # existing_df <- tryCatch(readRDS(here("data", "match_results", paste0(each_country, "_match_results.rds"))), error = function(e) data.frame())
46 |
47 | # we could scrape every leage for the most recent, but if the season has finished, what's the point?
48 | # The below logic will look to get any games where there are missing scores (we make the assumption that these are not yet played)
49 | # and if the date of these games is earlier than the scraping date, then scrape the results
50 | # df2 <- existing_df %>% filter(Season_End_Year == max(existing_df$Season_End_Year))
51 | # date_not_collected <- df2 %>% filter(is.na(HomeGoals) & is.na(AwayGoals)) %>% arrange(Date) %>% pull(Date) %>% min()
52 |
53 | # if(date_not_collected < Sys.Date()) {
54 |
55 | fixture_urls <- latest_seasons %>% filter(country == each_country) %>% pull(fixtures_url)
56 | # get the updated data
57 | new_df <- tryCatch(fixture_urls %>% purrr::map_df(worldfootballR::.get_each_season_results), error = function(e) data.frame())
58 |
59 | if(nrow(new_df) != 0) {
60 |
61 | new_df_full <- latest_seasons %>% filter(country == each_country) %>%
62 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>%
63 | dplyr::right_join(new_df, by = c("fixtures_url" = "fixture_url")) %>%
64 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>%
65 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>%
66 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], as.numeric(.data[["Wk"]]), .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T)
67 |
68 | if(nrow(existing_df) != 0) {
69 | existing_df <- existing_df %>%
70 | anti_join(new_df_full, by = c("Gender", "Season_End_Year", "Tier"))
71 |
72 | new_df_full <- bind_rows(existing_df, new_df_full) %>%
73 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Date"]], .data[["Time"]], as.numeric(.data[["Wk"]])) %>% dplyr::distinct(.keep_all = T)
74 | }
75 |
76 | attr(new_df_full, "scrape_timestamp") <- scrape_time_utc
77 |
78 | write_worldfootballr(x=new_df_full, name = paste0(each_country, "_match_results"), tag = "match_results", ext = "rds")
79 | # saveRDS(new_df_full, here("data", "match_results", paste0(each_country, "_match_results.rds")))
80 |
81 | }
82 |
83 | # }
84 |
85 |
86 | }
87 |
88 |
89 | # update data:
90 | countries_to_get %>% purrr::map(update_fb_match_results)
91 |
92 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
93 | # saveRDS(scrape_time_utc, here("data", "match_results", "scrape_time_match_results.rds"))
94 |
95 |
96 |
--------------------------------------------------------------------------------
/R/fb_match_results_cups/backfill_fb_cups_match_results.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | source("R/piggyback.R")
6 |
7 | backfill_historical_comp_results <- function(competition_collect) {
8 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
9 |
10 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
11 |
12 |
13 | fixtures_df <- seasons %>%
14 | # get only things that aren't domestic leagues:
15 | dplyr::filter(!stringr::str_detect(.data[["competition_type"]], "Leagues")) %>%
16 | # get seasons that are only for the competition selected
17 | dplyr::filter(competition_name %in% competition_collect,
18 | !is.na(.data[["season_end_year"]])) %>%
19 | dplyr::arrange(desc(.data[["season_end_year"]]))
20 |
21 | fixtures_urls <- fixtures_df %>%
22 | dplyr::pull(.data[["fixtures_url"]]) %>% unique()
23 |
24 |
25 | all_results <- data.frame()
26 | for(each_fixture in 1:length(fixtures_urls)) {
27 | print(paste0("Scraping URL ", each_fixture, " of ", length(fixtures_urls)))
28 | df <- worldfootballR::.get_each_season_results(fixture_url = fixtures_urls[each_fixture], time_pause = runif(1, 4, 6))
29 |
30 | all_results <- bind_rows(all_results, df)
31 | }
32 |
33 | all_results <- fixtures_df %>%
34 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>%
35 | dplyr::right_join(all_results, by = c("fixtures_url" = "fixture_url")) %>%
36 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>%
37 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>%
38 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Wk"]], .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T)
39 |
40 | # return(all_results)
41 | # clean names for files - will need to repeat this step for loading functions to convert the text users will see
42 | # as the competition name to this file name structure
43 | comp_name_file <- janitor::make_clean_names(competition_collect)
44 | # add the time stamp
45 | attr(all_results, "scrape_timestamp") <- scrape_time_utc
46 |
47 | # saveRDS(all_results, here("data", "match_results_cups", paste0(comp_name_file, "_match_results.rds")))
48 | write_worldfootballr(x=all_results, name = paste0(comp_name_file, "_match_results"), tag = "match_results_cups", ext = "rds")
49 | }
50 |
51 |
52 |
53 | #==================================================================================================================================================
54 | # Get Data ----------------------------------------------------------------
55 |
56 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
57 |
58 |
59 | cups_to_get <- seasons %>%
60 | # Getting only things that aren't domestic leagues:
61 | dplyr::filter(!stringr::str_detect(.data[["competition_type"]], "Leagues"),
62 | !is.na(.data[["season_end_year"]])) %>%
63 | dplyr::pull(competition_name) %>% unique()
64 |
65 |
66 | # the below cups are one off matches so we don't need scores and fixtures for these:
67 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana")
68 |
69 | # filter them out
70 | cups_to_get <- cups_to_get[!cups_to_get %in% exclusion_cups]
71 |
72 | # get data for all cups/competitions
73 | for(each_cup in cups_to_get){
74 | print(paste("Scraping", each_cup))
75 | backfill_historical_comp_results(each_cup)
76 | }
77 |
--------------------------------------------------------------------------------
/R/fb_match_results_cups/update_fb_cups_match_results.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(lubridate)
4 | library(janitor)
5 | library(here)
6 |
7 | source("R/piggyback.R")
8 |
9 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
10 |
11 | # the below cups are one off matches so we don't need scores and fixtures for these:
12 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana")
13 |
14 | latest_cup_seasons <- seasons %>%
15 | # filtering out things that aren't domestic leagues:
16 | filter(!stringr::str_detect(.data[["competition_type"]], "Leagues"),
17 | # and also the single match type cup games:
18 | !.data[["competition_name"]] %in% exclusion_cups) %>%
19 | group_by(competition_name) %>% slice_max(season_end_year) %>%
20 | distinct()
21 |
22 | latest_cup_seasons <- latest_cup_seasons %>%
23 | mutate(completed_new =
24 | case_when(
25 | competition_type == "National Team Qualification" & season_end_year >= lubridate::year(lubridate::today()) ~ FALSE,
26 | is.na(is_completed) ~ FALSE,
27 | TRUE ~ is_completed
28 | )
29 | )
30 |
31 |
32 |
33 | cups_to_get <- latest_cup_seasons %>%
34 | # filtering out things that aren't domestic leagues:
35 | dplyr::filter(!completed_new) %>%
36 | pull(competition_name) %>% unique()
37 |
38 |
39 | #=======================================================================================
40 | # Update Match Results ----------------------------------------------------
41 | #=======================================================================================
42 |
43 | update_fb_comp_match_results <- function(each_comp) {
44 |
45 | print(paste0("Getting Competition: ", each_comp))
46 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
47 |
48 | f_name <- janitor::make_clean_names(each_comp)
49 |
50 | existing_df <- read_worldfootballr_rds(name=paste0(f_name, "_match_results"), tag = "match_results_cups")
51 | # existing_df <- tryCatch(readRDS(here("data", "match_results_cups", paste0(f_name, "_match_results.rds"))), error = function(e) data.frame())
52 |
53 | # we could scrape every leage for the most recent, but if the season has finished, what's the point?
54 | # The below logic will look to get any games where there are missing scores (we make the assumption that these are not yet played)
55 | # and if the date of these games is earlier than the scraping date, then scrape the results
56 | # df2 <- existing_df %>% filter(Season_End_Year == max(existing_df$Season_End_Year))
57 | # date_not_collected <- df2 %>% filter(is.na(HomeGoals) & is.na(AwayGoals)) %>% arrange(Date) %>% pull(Date) %>% min()
58 |
59 | # if(date_not_collected < Sys.Date()) {
60 |
61 | fixture_urls <- latest_cup_seasons %>% filter(competition_name == each_comp) %>% pull(fixtures_url)
62 | # get the updated data
63 | new_df <- tryCatch(fixture_urls %>% purrr::map_df(worldfootballR::.get_each_season_results), error = function(e) data.frame())
64 |
65 | if(nrow(new_df) != 0) {
66 |
67 | new_df_full <- latest_cup_seasons %>% filter(competition_name == each_comp) %>%
68 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>%
69 | dplyr::right_join(new_df, by = c("fixtures_url" = "fixture_url")) %>%
70 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>%
71 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>%
72 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], as.numeric(.data[["Wk"]]), .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T)
73 |
74 | if(nrow(existing_df) != 0) {
75 | existing_df <- existing_df %>%
76 | anti_join(new_df_full, by = c("Gender", "Season_End_Year", "Tier"))
77 |
78 | new_df_full <- bind_rows(existing_df, new_df_full) %>%
79 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Date"]], .data[["Time"]], as.numeric(.data[["Wk"]])) %>% dplyr::distinct(.keep_all = T)
80 | }
81 |
82 | attr(new_df_full, "scrape_timestamp") <- scrape_time_utc
83 |
84 | write_worldfootballr(x=new_df_full, name = paste0(f_name, "_match_results"), tag = "match_results_cups", ext = "rds")
85 | # saveRDS(new_df_full, here("data", "match_results_cups", paste0(f_name, "_match_results.rds")))
86 |
87 | }
88 |
89 | # }
90 |
91 |
92 | }
93 |
94 |
95 | # update data:
96 | cups_to_get %>% purrr::map(update_fb_comp_match_results)
97 |
98 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
99 | # saveRDS(scrape_time_utc, here("data", "match_results", "scrape_time_match_results.rds"))
100 |
101 |
102 |
--------------------------------------------------------------------------------
/R/fb_match_shooting/adhoc_fb_match_shooting.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 |
9 | source(file.path('R', 'piggyback.R'))
10 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R'))
11 |
12 | ## fix tier ----
13 | fb_match_shooting_tag <- 'fb_match_shooting'
14 | params |>
15 | filter(group != 'non_domestic') |>
16 | mutate(
17 | data = pmap(
18 | list(
19 | country,
20 | gender,
21 | tier
22 | ),
23 | \(.country, .gender, .tier) {
24 | name <- sprintf('%s_%s_%s_match_shooting', .country, .gender, .tier)
25 | message(sprintf('Updating %s.', name))
26 | existing_match_shooting <- read_worldfootballr_rds(
27 | name = name,
28 | tag = fb_match_shooting_tag
29 | )
30 |
31 | if (all(!is.na(existing_match_shooting$Tier))) {
32 | return(existing_match_shooting)
33 | }
34 | existing_match_shooting |>
35 | mutate(Tier = coalesce(Tier, .tier)) |>
36 | write_worldfootballr_rds_and_csv(
37 | name = name,
38 | tag = fb_match_shooting_tag
39 | )
40 | }
41 | )
42 | )
43 |
44 | ## fix some incomplete matches ----
45 | fb_match_shooting_tag <- 'fb_match_shooting'
46 | country <- 'ENG'
47 | gender <- 'M'
48 | tier <- '1st'
49 | name <- sprintf('%s_%s_%s_match_shooting', country, gender, tier)
50 | existing_fb_match_shooting <- read_worldfootballr_rds(
51 | name = name,
52 | tag = fb_match_shooting_tag
53 | )
54 |
55 | scrape_fb_match_shooting <- function(match_url) {
56 | message(sprintf('Scraping matches for %s.', match_url))
57 | fb_match_shooting(match_url)
58 | }
59 |
60 | ## games not including all shots when originally scraped
61 | new_fb_match_shooting <- c(
62 | c(
63 | 'https://fbref.com/en/matches/070bf86d/Burnley-Newcastle-United-May-4-2024-Premier-League',
64 | 'https://fbref.com/en/matches/91a2da3b/Sheffield-United-Nottingham-Forest-May-4-2024-Premier-League'
65 | )
66 | ) |>
67 | set_names() |>
68 | map_dfr(scrape_fb_match_shooting, .id = 'MatchURL') |>
69 | as_tibble()
70 |
71 | matching_matches <- new_fb_match_shooting |>
72 | distinct(MatchURL) |>
73 | inner_join(
74 | existing_fb_match_shooting |>
75 | distinct(MatchURL, Competition_Name, Gender, Country, Tier, Season_End_Year)
76 | )
77 |
78 | bind_rows(
79 | existing_fb_match_shooting |>
80 | filter(!(MatchURL %in% matching_matches$MatchURL)),
81 | new_fb_match_shooting |>
82 | left_join(matching_matches)
83 | ) |>
84 | mutate(Tier = '1st') |> ## temp fix
85 | write_worldfootballr_rds_and_csv(
86 | name = name,
87 | tag = fb_match_shooting_tag
88 | )
89 |
--------------------------------------------------------------------------------
/R/fb_match_shooting/backfill_fb_match_shooting.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 |
9 | PARENT_DATA_DIR <- file.path('data', 'fb_match_shooting')
10 | SUB_DATA_DIR <- file.path(PARENT_DATA_DIR, 'match_shooting')
11 | dir.create(PARENT_DATA_DIR, showWarnings = FALSE)
12 | dir.create(SUB_DATA_DIR, showWarnings = FALSE)
13 |
14 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R'))
15 |
16 | scrape_fb_match_shooting <- function(url, data_dir, overwrite = FALSE) {
17 | rds_path <- file.path(data_dir, sprintf('%s.rds', basename(url)))
18 | if (!dir.exists(dirname(rds_path))) { dir.create(dirname(rds_path), showWarnings = FALSE, recursive = TRUE) }
19 |
20 | suffix <- sprintf('for `url = "%s"`.', url)
21 | if (file.exists(rds_path) & !overwrite) {
22 | return(readr::read_rds(rds_path))
23 | }
24 | message(sprintf('Scraping data %s', suffix))
25 | res <- worldfootballR::fb_match_shooting(url)
26 | readr::write_rds(res, rds_path)
27 | res
28 | }
29 |
30 | possibly_scrape_fb_match_shooting <- possibly(
31 | scrape_fb_match_shooting,
32 | otherwise = tibble::tibble(),
33 | quiet = FALSE
34 | )
35 |
36 | slowly_possibly_scrape_fb_match_shooting <- purrr::slowly(
37 | possibly_scrape_fb_match_shooting,
38 | rate = purrr::rate_delay(pause = 5),
39 | quiet = FALSE
40 | )
41 |
42 | backfill_fb_match_shooting <- function(
43 | country = 'ENG',
44 | gender = 'M',
45 | tier = '1st',
46 | group = 'big5',
47 | season_end_years = 2025
48 | ) {
49 |
50 | rds_path <- file.path(PARENT_DATA_DIR, sprintf('%s_%s_%s_match_shooting.rds', country, gender, tier))
51 | message(sprintf('Updating %s.', rds_path))
52 |
53 | if (is.null(season_end_years)) {
54 | first_season_end_year <- ifelse(
55 | group == 'big5',
56 | 2018,
57 | 2019
58 | )
59 |
60 | last_season_end_year <- lubridate::year(Sys.Date()) + 1L
61 | season_end_years <- first_season_end_year:last_season_end_year
62 | } else {
63 | last_season_end_year <- max(season_end_years)
64 | }
65 |
66 | res <- purrr::map_dfr(
67 | season_end_years,
68 | function(season_end_year) {
69 |
70 | season_path <- file.path(SUB_DATA_DIR, country, gender, tier, paste0(season_end_year, '.rds'))
71 | # if (season_end_year < last_season_end_year & file.exists(season_path)) {
72 | # return(readRDS(season_path))
73 | # }
74 | if (file.exists(season_path)) {
75 | return(readRDS(season_path))
76 | }
77 |
78 | match_urls <- worldfootballR::fb_match_urls(
79 | country = country,
80 | tier = tier,
81 | gender = gender,
82 | season_end_year = season_end_year
83 | )
84 |
85 | if (length(match_urls) == 0) {
86 | warning(
87 | sprintf('No match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `season_end_year = %s`.', country, gender, tier, season_end_year)
88 | )
89 | return(tibble::tibble())
90 | }
91 |
92 | new_data <- match_urls |>
93 | rlang::set_names() |>
94 | purrr::map_dfr(
95 | \(.x) slowly_possibly_scrape_fb_match_shooting(
96 | url = .x,
97 | data_dir = file.path(SUB_DATA_DIR, country, gender, tier, season_end_year)
98 | ),
99 | .id = 'MatchURL'
100 | ) |>
101 | dplyr::relocate(MatchURL, .before = 1)
102 |
103 | ## for the URLs
104 | match_results <- worldfootballR::load_match_results(
105 | country = country,
106 | tier = tier,
107 | gender = gender,
108 | season_end_year = season_end_year
109 | )
110 |
111 | res <- new_data |>
112 | dplyr::inner_join(
113 | match_results |>
114 | dplyr::transmute(
115 | Competition_Name,
116 | Gender,
117 | Country,
118 | Tier = .env$tier,
119 | Season_End_Year,
120 | MatchURL
121 | ),
122 | by = dplyr::join_by(MatchURL)
123 | ) |>
124 | tibble::as_tibble()
125 | saveRDS(res, season_path)
126 | res
127 | }
128 | )
129 |
130 | attr(res, 'scrape_timestamp') <- as.POSIXlt(Sys.time(), tz = 'UTC')
131 | readr::write_rds(
132 | res,
133 | rds_path
134 | )
135 |
136 | invisible(res)
137 | }
138 |
139 | local_data <- params |>
140 | # dplyr::filter(
141 | # (
142 | # # country == 'ENG' &
143 | # group != 'big5' &
144 | # gender == 'M' &
145 | # tier == '1st'
146 | # )
147 | # ) |>
148 | dplyr::mutate(
149 | data = purrr::pmap(
150 | list(
151 | country,
152 | gender,
153 | tier,
154 | group
155 | ),
156 | ~backfill_fb_match_shooting(
157 | country = ..1,
158 | gender = ..2,
159 | tier = ..3,
160 | group = ..4,
161 | season_end_years = NULL
162 | )
163 | )
164 | )
165 |
166 |
167 | ## could just put this in the function, but i want to check locally before i upload
168 | # source(file.path('R', 'piggyback.R'))
169 | # local_data |>
170 | # mutate(
171 | # name = sprintf('%s_%s_%s_match_shooting', country, gender, tier),
172 | # res = map2(
173 | # data,
174 | # name,
175 | # ~{
176 | # write_worldfootballr_rds_and_csv(
177 | # x = .x,
178 | # name = .y,
179 | # tag = 'fb_match_shooting'
180 | # )
181 | # }
182 | # )
183 | # )
184 | #
185 |
--------------------------------------------------------------------------------
/R/fb_match_shooting/shared_fb_match_shooting.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | params <- dplyr::bind_rows(
3 | 'big5' = list(
4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'),
5 | tier = '1st',
6 | gender = 'M'
7 | ),
8 | 'other_1st_M' = list(
9 | country = c('POR', 'NED', 'BRA', 'MEX', 'USA'),
10 | tier = '1st',
11 | gender = 'M'
12 | ),
13 | '1st_F' = list(
14 | country = c('ENG', 'USA', 'ESP'),
15 | tier = '1st',
16 | gender = 'F'
17 | ),
18 | '2nd_M' = list(
19 | country = c('ENG'),
20 | tier = '2nd',
21 | gender = 'M'
22 | ),
23 | .id = 'group'
24 | )
25 |
26 |
--------------------------------------------------------------------------------
/R/fb_match_shooting/update_fb_match_shooting.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 | library(lubridate)
9 |
10 | source(file.path('R', 'piggyback.R'))
11 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R'))
12 |
13 | all_seasons <- readr::read_csv(
14 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv'
15 | )
16 |
17 | seasons <- all_seasons |>
18 | dplyr::semi_join(
19 | params,
20 | by = c('country', 'tier', 'gender')
21 | ) |>
22 | dplyr::filter(season_end_year >= 2017L) |>
23 | dplyr::distinct(
24 | country,
25 | gender,
26 | tier,
27 | season_end_year
28 | )
29 |
30 | scrape_fb_match_shooting <- function(match_url) {
31 | message(sprintf('Scraping matches for %s.', match_url))
32 | worldfootballR::fb_match_shooting(match_url)
33 | }
34 |
35 | possibly_scrape_fb_match_shooting <- purrr::possibly(
36 | scrape_fb_match_shooting,
37 | otherwise = tibble::tibble(),
38 | quiet = FALSE
39 | )
40 |
41 | fb_match_shooting_tag <- 'fb_match_shooting'
42 | update_fb_match_shooting <- function(country, gender = 'M', tier = '1st', date_threshold = 3L) {
43 | name <- sprintf('%s_%s_%s_match_shooting', country, gender, tier)
44 | message(sprintf('Updating %s.', name))
45 |
46 | filtered_seasons <- seasons |>
47 | dplyr::filter(
48 | country == !!country,
49 | gender == !!gender,
50 | tier == !!tier
51 | ) |>
52 | dplyr::pull(season_end_year)
53 |
54 | latest_season <- max(filtered_seasons)
55 |
56 | match_urls <- worldfootballR::fb_match_urls(
57 | country = country,
58 | tier = tier,
59 | gender = gender,
60 | season_end_year = latest_season
61 | )
62 | date_rgx <- sprintf('(%s)-[0-9]{1,2}-20[0-9]{2}', paste0(month.name, collapse = '|'))
63 | match_names <- basename(match_urls)
64 | match_dates <- match_names |>
65 | # stringr::str_extract() |>
66 | regmatches(regexpr(date_rgx, match_names)) |>
67 | lubridate::mdy()
68 |
69 | current_date <- Sys.Date()
70 | diffs <- as.integer(lubridate::as.difftime(current_date - match_dates, units = 'days'))
71 | discarded_match_urls <- match_urls[diffs <= date_threshold]
72 | retained_match_urls <- match_urls[diffs > date_threshold]
73 |
74 | existing_match_shooting <- read_worldfootballr_rds(
75 | name = name,
76 | tag = fb_match_shooting_tag
77 | )
78 | existing_match_urls <- unique(existing_match_shooting$MatchURL)
79 | new_match_urls <- setdiff(retained_match_urls, setdiff(existing_match_urls, discarded_match_urls))
80 |
81 | if (length(new_match_urls) == 0) {
82 | message(sprintf('Not updating data for `country = "%s"`, `gender = "%s"`, `tier = "%s"`.', country, gender, tier))
83 | return(existing_match_shooting)
84 | }
85 |
86 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC')
87 |
88 | new_match_shooting <- new_match_urls |>
89 | rlang::set_names() |>
90 | purrr::map_dfr(
91 | possibly_scrape_fb_match_shooting,
92 | .id = 'MatchURL'
93 | ) |>
94 | dplyr::relocate(MatchURL, .before = 1)
95 |
96 | match_results <- worldfootballR::load_match_results(
97 | country = country,
98 | tier = tier,
99 | gender = gender,
100 | season_end_year = filtered_seasons
101 | )
102 |
103 | match_shooting <- dplyr::bind_rows(
104 | existing_match_shooting |>
105 | dplyr::filter(!(MatchURL %in% discarded_match_urls)),
106 | new_match_shooting |>
107 | dplyr::inner_join(
108 | match_results |>
109 | dplyr::transmute(
110 | Competition_Name,
111 | Gender,
112 | Country,
113 | Tier = .env$tier,
114 | Season_End_Year,
115 | MatchURL
116 | )
117 | )
118 | ) |>
119 | tibble::as_tibble()
120 |
121 | attr(match_shooting, 'scrape_timestamp') <- scrape_time_utc
122 |
123 | write_worldfootballr_rds_and_csv(
124 | x = match_shooting,
125 | name = name,
126 | tag = fb_match_shooting_tag
127 | )
128 |
129 | match_shooting
130 | }
131 |
132 | params |>
133 | dplyr::mutate(
134 | data = purrr::pmap(
135 | list(
136 | country,
137 | gender,
138 | tier
139 | ),
140 | ~update_fb_match_shooting(
141 | country = ..1,
142 | gender = ..2,
143 | tier = ..3
144 | )
145 | )
146 | )
147 |
148 |
--------------------------------------------------------------------------------
/R/fb_match_summary/backfill_fb_match_summary.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 |
9 | PARENT_DATA_DIR <- file.path('data', 'fb_match_summary')
10 | SUB_DATA_DIR <- file.path(PARENT_DATA_DIR, 'matches')
11 | dir.create(SUB_DATA_DIR, showWarnings = FALSE, recursive = FALSE)
12 |
13 | source(file.path('R', 'fb_match_summary', 'shared_fb_match_summary.R'))
14 |
15 | scrape_fb_match_summary <- function(url, data_dir, overwrite = FALSE) {
16 | rds_path <- file.path(data_dir, sprintf('%s.rds', basename(url)))
17 | if (!dir.exists(dirname(rds_path))) { dir.create(dirname(rds_path), showWarnings = FALSE, recursive = TRUE) }
18 |
19 | suffix <- sprintf('for `url = "%s"`.', url)
20 | if (file.exists(rds_path) & !overwrite) {
21 | return(readr::read_rds(rds_path))
22 | }
23 | message(sprintf('Scraping data %s', suffix))
24 | stats <- worldfootballR::fb_match_summary(url)
25 | readr::write_rds(stats, rds_path)
26 | stats
27 | }
28 |
29 | possibly_scrape_fb_match_summary <- purrr::possibly(
30 | scrape_fb_match_summary,
31 | otherwise = tibble::tibble(),
32 | quiet = FALSE
33 | )
34 |
35 | backfill_fb_match_summary <- function(
36 | country = 'ENG',
37 | gender = 'M',
38 | tier = '1st',
39 | group = 'big5'
40 | ) {
41 |
42 | rds_path <- file.path(PARENT_DATA_DIR, sprintf('%s_%s_%s_match_summary.rds', country, gender, tier))
43 | message(sprintf('Updating %s.', rds_path))
44 |
45 | first_season_end_year <- ifelse(
46 | group == 'big5',
47 | 2018,
48 | 2019
49 | )
50 |
51 | last_season_end_year <- lubridate::year(Sys.Date()) + 1L
52 | season_end_years <- first_season_end_year:last_season_end_year
53 |
54 | res <- purrr::map_dfr(
55 | season_end_years,
56 | function(season_end_year) {
57 |
58 | season_path <- file.path(SUB_DATA_DIR, country, gender, tier, paste0(season_end_year, '.rds'))
59 | if (season_end_year < last_season_end_year & file.exists(season_path)) {
60 | return(readRDS(season_path))
61 | }
62 |
63 | match_urls <- worldfootballR::fb_match_urls(
64 | country = country,
65 | tier = tier,
66 | gender = gender,
67 | season_end_year = season_end_year
68 | )
69 |
70 | if (length(match_urls) == 0) {
71 | warning(
72 | sprintf('No match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `season_end_year = %s`.', country, gender, tier, season_end_year)
73 | )
74 | return(tibble::tibble())
75 | }
76 |
77 | new_data <- match_urls |>
78 | rlang::set_names() |>
79 | purrr::map_dfr(
80 | \(.x) possibly_scrape_fb_match_summary(
81 | url = .x,
82 | data_dir = file.path(SUB_DATA_DIR, country, gender, tier, season_end_year)
83 | ),
84 | .id = 'MatchURL'
85 | ) |>
86 | dplyr::relocate(MatchURL, .before = 1)
87 |
88 | ## for the URLs
89 | match_results <- worldfootballR::load_match_results(
90 | country = country,
91 | tier = tier,
92 | gender = gender,
93 | season_end_year = season_end_year
94 | )
95 |
96 | res <- new_data |>
97 | dplyr::inner_join(
98 | match_results |>
99 | dplyr::transmute(
100 | Competition_Name,
101 | Gender,
102 | Country,
103 | Tier = .env$tier,
104 | Season_End_Year,
105 | MatchURL
106 | ),
107 | by = dplyr::join_by(MatchURL)
108 | ) |>
109 | tibble::as_tibble()
110 | saveRDS(res, season_path)
111 | res
112 | }
113 | )
114 |
115 | attr(res, 'scrape_timestamp') <- as.POSIXlt(Sys.time(), tz = 'UTC')
116 | readr::write_rds(
117 | res,
118 | rds_path
119 | )
120 |
121 | invisible(res)
122 | }
123 |
124 | local_data <- params |>
125 | filter(country == 'ENG', gender == 'F') |>
126 | dplyr::mutate(
127 | data = purrr::pmap(
128 | list(
129 | country,
130 | gender,
131 | tier,
132 | group
133 | ),
134 | ~backfill_fb_match_summary(
135 | country = ..1,
136 | gender = ..2,
137 | tier = ..3,
138 | group = ..4
139 | )
140 | )
141 | )
142 |
143 | ## could just put this in the function, but i want to check locally before i upload
144 | source(file.path('R', 'piggyback.R'))
145 | local_data |>
146 | mutate(
147 | name = sprintf('%s_%s_%s_match_summary', country, gender, tier),
148 | res = map2(
149 | data,
150 | name,
151 | ~{
152 | write_worldfootballr_rds_and_csv(
153 | x = .x,
154 | name = .y,
155 | tag = 'fb_match_summary'
156 | )
157 | }
158 | )
159 | )
160 |
--------------------------------------------------------------------------------
/R/fb_match_summary/shared_fb_match_summary.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | params <- bind_rows(
3 | 'big5' = list(
4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'),
5 | tier = '1st',
6 | gender = 'M'
7 | ),
8 | 'other_1st_M' = list(
9 | country = c('POR', 'NED', 'BRA', 'MEX', 'USA'),
10 | tier = '1st',
11 | gender = 'M'
12 | ),
13 | '1st_F' = list(
14 | country = c('ENG', 'USA'),
15 | tier = '1st',
16 | gender = 'F'
17 | ),
18 | '2nd_M' = list(
19 | country = c('ENG'),
20 | tier = '2nd',
21 | gender = 'M'
22 | ),
23 | .id = 'group'
24 | )
25 |
--------------------------------------------------------------------------------
/R/fb_match_summary/update_fb_match_summary.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyr)
3 | library(dplyr)
4 | library(readr)
5 | library(purrr)
6 | library(tibble)
7 | library(rlang)
8 |
9 | source(file.path('R', 'piggyback.R'))
10 | source(file.path('R', 'fb_match_summary', 'shared_fb_match_summary.R'))
11 |
12 | all_seasons <- read_csv(
13 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv'
14 | )
15 |
16 | seasons <- all_seasons |>
17 | semi_join(
18 | params,
19 | by = c('country', 'tier', 'gender')
20 | ) |>
21 | filter(season_end_year >= 2017L) |>
22 | distinct(
23 | country,
24 | gender,
25 | tier,
26 | season_end_year
27 | )
28 |
29 | scrape_fb_match_summary <- function(match_url) {
30 | message(sprintf('Scraping matches for %s.', match_url))
31 | fb_match_summary(match_url)
32 | }
33 |
34 | possibly_scrape_fb_match_summary <- possibly(
35 | scrape_fb_match_summary,
36 | otherwise = tibble(),
37 | quiet = FALSE
38 | )
39 |
40 | fb_match_summary_tag <- 'fb_match_summary'
41 | update_fb_match_summary <- function(country = 'ENG', gender = 'M', tier = '1st') {
42 | name <- sprintf('%s_%s_%s_match_summary', country, gender, tier)
43 | message(sprintf('Updating %s.', name))
44 |
45 | filtered_seasons <- seasons |>
46 | filter(
47 | country == !!country,
48 | gender == !!gender,
49 | tier == !!tier
50 | ) |>
51 | pull(season_end_year)
52 |
53 | latest_season <- max(filtered_seasons)
54 |
55 | match_urls <- fb_match_urls(
56 | country = country,
57 | tier = tier,
58 | gender = gender,
59 | season_end_year = latest_season
60 | )
61 |
62 | existing_match_summary <- read_worldfootballr_rds(
63 | name = name,
64 | tag = fb_match_summary_tag
65 | )
66 | existing_match_urls <- unique(existing_match_summary$MatchURL)
67 | new_match_urls <- setdiff(match_urls, existing_match_urls)
68 |
69 | if (length(new_match_urls) == 0) {
70 | message(sprintf('Not updating data for `country = "%s"`, `gender = "%s"`, `tier = "%s"`.', country, gender, tier))
71 | return(existing_match_summary)
72 | }
73 |
74 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC')
75 |
76 | new_match_summary <- new_match_urls |>
77 | set_names() |>
78 | map_dfr(
79 | possibly_scrape_fb_match_summary,
80 | .id = 'MatchURL'
81 | ) |>
82 | relocate(MatchURL, .before = 1)
83 |
84 | match_results <- load_match_results(
85 | country = country,
86 | tier = tier,
87 | gender = gender,
88 | season_end_year = filtered_seasons
89 | )
90 |
91 | match_summary <- bind_rows(
92 | existing_match_summary,
93 | new_match_summary |>
94 | inner_join(
95 | match_results |>
96 | transmute(
97 | Competition_Name,
98 | Gender,
99 | Country,
100 | Tier = .env$tier,
101 | Season_End_Year,
102 | MatchURL
103 | )
104 | )
105 | ) |>
106 | as_tibble()
107 |
108 | attr(match_summary, 'scrape_timestamp') <- scrape_time_utc
109 |
110 | write_worldfootballr_rds_and_csv(
111 | x = match_summary,
112 | name = name,
113 | tag = fb_match_summary_tag
114 | )
115 |
116 | match_summary
117 | }
118 |
119 | params |>
120 | mutate(
121 | data = pmap(
122 | list(
123 | country,
124 | gender,
125 | tier
126 | ),
127 | ~update_fb_match_summary(
128 | country = ..1,
129 | gender = ..2,
130 | tier = ..3
131 | )
132 | )
133 | )
134 |
135 |
--------------------------------------------------------------------------------
/R/piggyback.R:
--------------------------------------------------------------------------------
1 | library(purrr)
2 | library(readr)
3 | library(piggyback)
4 |
5 | write_csv2 <- purrr::partial(
6 | readr::write_csv,
7 | na = "",
8 | ... =
9 | )
10 |
11 | worldfootballr_repo <- "JaseZiv/worldfootballR_data"
12 | write_worldfootballr <- function(x, name, tag, ext = c("rds", "csv")) {
13 | ext <- match.arg(ext)
14 | dir <- tempdir(check = TRUE)
15 | basename <- sprintf("%s.%s", name, ext)
16 | path <- file.path(dir, basename)
17 | f <- switch(
18 | ext,
19 | "rds" = readr::write_rds,
20 | "csv" = write_csv2
21 | )
22 | f(x, path)
23 | piggyback::pb_upload(
24 | path,
25 | repo = worldfootballr_repo,
26 | tag = tag
27 | )
28 | }
29 |
30 | write_worldfootballr_rds_and_csv <- function(x, name, tag) {
31 | purrr::walk(
32 | c("rds", "csv"),
33 | ~write_worldfootballr(
34 | x = x,
35 | name = name,
36 | tag = tag,
37 | ext = .x
38 | )
39 | )
40 | }
41 |
42 | read_worldfootballr_rds <- function(name, tag) {
43 | path <- sprintf("https://github.com/%s/releases/download/%s/%s.rds", worldfootballr_repo, tag, name)
44 | readRDS(url(path))
45 | }
46 |
47 | read_worldfootballr_csv <- function(name, tag) {
48 | path <- sprintf("https://github.com/%s/releases/download/%s/%s.csv", worldfootballr_repo, tag, name)
49 | read.csv(path)
50 | }
51 |
52 | safely_read_worldfootballr_rds <- purrr::safely(read_worldfootballr_rds)
53 |
54 | read_worldfootballr <- function(name, tag) {
55 | res <- safely_read_worldfootballr_rds(name, tag)
56 | if (is.null(res$error)) {
57 | return(res$result)
58 | }
59 | message(
60 | sprintf(
61 | 'Missing RDS file at `name = "%s"` (`tag: "%s"`).\nTrying to read from the CSV.',
62 | name,
63 | tag
64 | )
65 | )
66 | read_worldfootballr_csv(name, tag)
67 | }
68 |
--------------------------------------------------------------------------------
/R/tm_player_vals/backfill_big5_player_vals.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 |
6 | for(each_season in c(2010:2022)) {
7 | print(paste0("scraping season: ", each_season))
8 |
9 | each_df <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
10 | start_year = each_season)
11 | df <- bind_rows(df, each_df)
12 | }
13 |
14 |
15 | saveRDS(full, here("data", "tm_player_vals", "big5_player_vals.rds"))
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/R/tm_player_vals/update_big5_player_vals.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 | library(rvest)
5 | library(xml2)
6 |
7 |
8 | existing <- readRDS(here("data", "tm_player_vals", "big5_player_vals.rds"))
9 |
10 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
11 |
12 | # need to get the latest season available for the big 5 on transfermarkt (using the EPL as the proxy here)
13 | epl_url <- "https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1"
14 | pg <- read_html(epl_url)
15 | max_season <- pg %>% html_nodes(".chzn-select option") %>% html_attr("value") %>% purrr::pluck(1) %>% as.numeric()
16 |
17 |
18 | # if the latest season is the same as the last season we currently have, update just that season
19 | if(max(existing$season_start_year, na.rm = T) == max_season) {
20 |
21 | print(paste0("Scraping data to update current season (", max_season, ")"))
22 |
23 | update_season <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
24 | start_year = max_season)
25 |
26 | existing_except_new <- existing %>%
27 | filter(season_start_year != max_season)
28 |
29 | new_df <- bind_rows(
30 | existing_except_new,
31 | update_season
32 | )
33 |
34 | # if the latest season on the site is ahead of the latest data we have stores, then append the new data
35 | } else if(max(existing$season_start_year, na.rm = T) > max_season) {
36 |
37 | print(paste0("Scraping data to get new season (", max_season, ")"))
38 |
39 | update_season <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
40 | start_year = max_season)
41 |
42 | new_df <- bind_rows(
43 | existing,
44 | update_season
45 | )
46 | # otherwise, error, because we don't want to overwrite a season aleady scraped
47 | } else {
48 | stop(paste0("There is an error and this process might incorrectly overwrite existing data as the latest season available at ",
49 | epl_url, " is less than the last season data extracted for, which is the season starting ",
50 | max(existing$season_start_year)))
51 | }
52 |
53 | # then if a new df has been created, then write it to file
54 | if(nrow(new_df) > 0) {
55 | attr(new_df, "scrape_timestamp") <- scrape_time_utc
56 | saveRDS(new_df, here("data", "tm_player_vals", "big5_player_vals.rds"))
57 | }
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/R/tm_transfers/backfill_big5_transfers.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(dplyr)
3 | library(here)
4 |
5 |
6 | countries <- c("England", "Italy", "Germany", "Spain", "France")
7 | all_transfers <- data.frame()
8 |
9 | for(each_country in countries) {
10 |
11 | each_season_df <- data.frame()
12 |
13 | for(i in c(2010:2022)) {
14 | print(paste0("Scraping country: ", each_country, " for season: ", i))
15 | urls <- tm_league_team_urls(country_name = each_country, start_year = i)
16 | season_transfers <- tm_team_transfers(urls)
17 | each_season_df <- rbind(each_season_df, season_transfers)
18 | }
19 |
20 | all_transfers <- rbind(all_transfers, each_season_df)
21 |
22 | }
23 |
24 |
25 |
26 | # because the initial scrape was conducted Sep 2022, the current leagues were assigned to teams, but what we want is the relevant
27 | # league we wanted for the season scraped. Additionally, there are two teams who no longer exist, so these need to be mapped.
28 | # Will manually coerce these here:
29 |
30 | all_transfers <- all_transfers %>%
31 | dplyr::mutate(
32 | country =
33 | dplyr::case_when(
34 | team_name == "Athlétic Club Arlésien" ~ "France",
35 | team_name == "Chievo Verona" ~ "Italy",
36 | TRUE ~ country
37 | ),
38 | league =
39 | dplyr::case_when(
40 | country == "England" ~ "Premier League",
41 | country == "France" ~ "Ligue 1",
42 | country == "Germany" ~ "Bundesliga",
43 | country == "Italy" ~ "Serie A",
44 | country == "Spain" ~ "LaLiga"
45 | )
46 | )
47 |
48 |
49 | saveRDS(all_transfers, here::here("data", "tm_transfers", "big_5_transfers.rds"))
--------------------------------------------------------------------------------
/R/understat_league_shots/backup_understat_local.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(dplyr)
3 | library(janitor)
4 |
5 | setwd(paste0(here::here(), "/data/understat_shots"))
6 |
7 |
8 | leagues <- c("EPL", "La liga", "Bundesliga", "Serie A", "Ligue 1", "RFPL")
9 |
10 | for(each_league in leagues) {
11 | if(each_league == "La liga") {
12 | each_league_clean <- "La_liga"
13 | } else if (each_league == "Serie A") {
14 | each_league_clean <- "Serie_A"
15 | } else if (each_league == "Ligue 1") {
16 | each_league_clean <- "Ligue_1"
17 | } else {
18 | each_league_clean <- each_league
19 | }
20 |
21 |
22 | league_name_clean <- janitor::make_clean_names(each_league)
23 |
24 | f <- read_worldfootballr_rds(name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots") %>%
25 | mutate(minute = as.numeric(minute))
26 |
27 | saveRDS(f, paste0(league_name_clean, "_shot_data.rds"))
28 | }
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/R/understat_league_shots/update_understat_shots.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | # set the working directory to make reading and writing easier
6 | # setwd(here("data", "understat_shots"))
7 |
8 | source("R/piggyback.R")
9 |
10 | # valid league names for scraping data
11 | leagues <- c("EPL", "La liga", "Bundesliga", "Serie A", "Ligue 1", "RFPL")
12 |
13 |
14 | .get_understat_json <- function(page_url) {
15 | tryCatch(
16 | httr::GET(page_url, httr::set_cookies(.cookies = c("beget" = "begetok"))) %>% httr::content(),
17 | error = function(e) NA
18 | )
19 | }
20 |
21 |
22 | for(each_league in leagues) {
23 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC")
24 |
25 | if(each_league == "La liga") {
26 | each_league_clean <- "La_liga"
27 | } else if (each_league == "Serie A") {
28 | each_league_clean <- "Serie_A"
29 | } else if (each_league == "Ligue 1") {
30 | each_league_clean <- "Ligue_1"
31 | } else {
32 | each_league_clean <- each_league
33 | }
34 |
35 | # first we want to get the current season:
36 | main_url <- "https://understat.com/"
37 | page_url <- paste0(main_url, "league/", each_league_clean)
38 | page <- tryCatch( .get_understat_json(page_url), error = function(e) NA)
39 |
40 | season_element <- page %>% rvest::html_nodes(xpath = '//*[@name="season"]') %>%
41 | rvest::html_nodes("option")
42 | season <- season_element %>% rvest::html_attr("value") %>% as.numeric() %>% max(na.rm = T)
43 |
44 | # also need to read in the existing shot data file to see which games have not yet been collected:
45 | # to do this, we need to clean the valid league names to match the file structure
46 | league_name_clean <- janitor::make_clean_names(each_league)
47 | # then read in data
48 | f <- read_worldfootballr_rds(name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots") %>%
49 | mutate(minute = as.numeric(minute))
50 |
51 | # need to manually coerce columns to numeric as of the start of 22/23 season to match old data
52 | f <- f %>%
53 | mutate(
54 | id = as.numeric(id),
55 | player_id = as.numeric(player_id),
56 | season = as.numeric(season),
57 | match_id = as.numeric(match_id)
58 | )
59 |
60 | # also need to read in the match data to get all match IDs, to then compare which matches have been played (and will then have shot data)
61 | match_data <- tryCatch(worldfootballR::understat_league_match_results(league = each_league, season_start_year = season), error = function(e) data.frame())
62 |
63 | if(nrow(match_data) != 0) {
64 | match_data <- match_data %>% filter(isResult == TRUE)
65 | # only want to keep those match IDs for which we don't have shot data for
66 | missing_ids <- match_data$match_id[!match_data$match_id %in% f$match_id]
67 | } else {
68 | missing_ids <-c()
69 | }
70 |
71 | # then, if there are any matches where we don't already have shot data, go and get them
72 | if(length(missing_ids) > 0) {
73 | match_urls <- paste0("https://understat.com/match/", missing_ids)
74 |
75 | shots <- match_urls %>% purrr::map_df(worldfootballR::understat_match_shots)
76 | # there must have been a change to the json data exposed by Understat at some point, so we manually set it now
77 | shots$league <- each_league
78 | # need to manually coerce columns to numeric as of the start of 22/23 season to match old data
79 | shots <- shots %>%
80 | mutate(
81 | id = as.numeric(id),
82 | player_id = as.numeric(player_id),
83 | season = as.numeric(season),
84 | match_id = as.numeric(match_id)
85 | )
86 |
87 | # column names were slightly different prior to the 2021/2022 season - we want to keep these consistent
88 | if(any(grepl("last_action", names(shots)))) {
89 | shots <- shots %>%
90 | rename(X=x, Y=y, xG=x_g, shotType=shot_type, lastAction=last_action)
91 | }
92 | # join them all together
93 | f <- bind_rows(f, shots)
94 | }
95 |
96 | # now write the file again, regardless of whether there was new data. Will also freshly timestamp the rds
97 | attr(f, "scrape_timestamp") <- scrape_time_utc
98 |
99 | write_worldfootballr(x=f, name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots", ext = "rds")
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # worldfootballR_data
2 |
3 |
4 | 
5 |
6 |
7 | # worldfootballR_data
8 | Repository to hold various data sets scraped from the sites supported in the [`worldfootballR`](https://github.com/JaseZiv/worldfootballR) package. Current sites include:
9 |
10 | * fbref.com
11 | * transfermarkt.com
12 | * understat.com
13 | * fotmob.com
14 |
15 | ***
16 |
17 | ## Show your support
18 | Follow me on Twitter ([jaseziv](https://twitter.com/jaseziv)) for updates
19 |
20 | If this data helps you, all I ask is that you star this repo. If you did want to show your support and contribute to server time and data storage costs, feel free to send a small donation through the link below.
21 |
22 |
23 |
24 | ***
25 |
26 | ## The Data
27 |
28 | The data can be split up into two main categories:
29 |
30 | ### 1. Supporting data to help with the functions in `worldfootballR`:
31 |
32 | * [Fbref Comps and Leagues](https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/all_leages_and_cups/all_competitions.csv)
33 | * [Transfermarkt Leagues](https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv)
34 | * [Mapping between FBref and Transfermarkt Players](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv)
35 |
36 |
37 | ### 2. Data sets used in the `load_` functions in `worldfootballR`:
38 |
39 | * [FBref Big 5 League Advanced season stats](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/fb_big5_advanced_season_stats)
40 |
41 | For players and teams, all advanced statistic data available on the site
42 |
43 | * [FBref match results - Domestic Leagues](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/match_results)
44 |
45 | Includes match results played for all domestic leagues available on the site, for all years match results are listed under the fixtures section of leagues
46 |
47 | * [FBref match results - International matches and domestic cups](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/match_results_cups)
48 |
49 | Includes match results played for all domestic cups and international matches available on the site, for all years match results are listed under the fixtures section of cups/comps
50 |
51 | * [Understat shot locations for the Big 5 leagues and RFPL](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/understat_shots)
52 |
53 | Shooting data and locations for the big 5 leagues and the RFPL since the 2014/15 seasons.
54 |
55 | Shout out to [Mark Wilkins](https://twitter.com/biscuitchaser) for supplying the original data dump of the seasons for all big 5 leagues from 2014/15 to 2021/22. The data was originally [here](https://github.com/Markjwilkins/Understat)
56 |
--------------------------------------------------------------------------------
/data/fb_advanced_match_stats/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_defense.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_defense.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_gca.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_gca.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_keepers.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_keepers.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_keepers_adv.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_keepers_adv.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_misc.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_misc.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_passing.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_passing.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_passing_types.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_passing_types.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_playing_time.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_playing_time.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_possession.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_possession.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_shooting.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_shooting.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_player_standard.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_standard.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_defense.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_defense.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_gca.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_gca.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_keepers.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_keepers.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_keepers_adv.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_keepers_adv.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_misc.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_misc.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_passing.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_passing.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_passing_types.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_passing_types.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_playing_time.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_playing_time.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_possession.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_possession.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_shooting.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_shooting.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_season_stats/big5_team_standard.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_standard.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/README.md:
--------------------------------------------------------------------------------
1 | # StatsBomb via FBRef
2 |
3 | FBRef changed data providers from StatsBomb to Opta late October 2022. This meant that all previously displayed data changed overnight to reflect Opta's counting/estimating of statistics.
4 |
5 | For any analysts looking to maintain previous analysis, or to be able to compare StatsBomb and Opta, use the data files in this directory for StatsBomb data, which was last updated 2022-08-22.
6 |
7 | For the equivalent Opta data sets, see the `data/fb_big5_advanced_season_stats` directory in this repository.
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_defense.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_defense.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_gca.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_gca.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_keepers.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_keepers.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_keepers_adv.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_keepers_adv.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_misc.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_misc.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_passing.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_passing.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_passing_types.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_passing_types.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_playing_time.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_playing_time.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_possession.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_possession.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_shooting.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_shooting.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_player_standard.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_standard.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_defense.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_defense.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_gca.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_gca.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_keepers.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_keepers.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_keepers_adv.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_keepers_adv.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_misc.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_misc.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_passing.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_passing.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_passing_types.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_passing_types.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_playing_time.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_playing_time.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_possession.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_possession.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_shooting.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_shooting.rds
--------------------------------------------------------------------------------
/data/fb_big5_advanced_statsbomb/big5_team_standard.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_standard.rds
--------------------------------------------------------------------------------
/data/fb_match_shooting/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/data/fb_match_summary/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/data/fotmob_match_details/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/data/match_results/ARG_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ARG_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/AUS_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/AUS_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/AUT_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/AUT_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/BEL_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BEL_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/BOL_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BOL_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/BRA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BRA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/BUL_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BUL_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/CAN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CAN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/CHI_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CHI_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/CHN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CHN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/COL_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/COL_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/CRO_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CRO_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/CZE_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CZE_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/DEN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/DEN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/ECU_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ECU_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/ENG_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ENG_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/ESP_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ESP_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/FIN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/FIN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/FRA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/FRA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/GER_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/GER_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/GRE_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/GRE_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/HUN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/HUN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/IND_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/IND_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/IRN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/IRN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/ITA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ITA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/JPN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/JPN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/KOR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/KOR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/KSA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/KSA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/MEX_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/MEX_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/NED_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/NED_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/NOR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/NOR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/PAR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/PAR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/PER_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/PER_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/POL_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/POL_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/POR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/POR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/ROU_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ROU_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/RSA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/RSA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/RUS_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/RUS_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/SCO_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SCO_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/SRB_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SRB_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/SUI_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SUI_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/SWE_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SWE_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/TUR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/TUR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/UKR_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/UKR_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/URU_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/URU_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/USA_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/USA_match_results.rds
--------------------------------------------------------------------------------
/data/match_results/VEN_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/VEN_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 | ## Competition Names for `worldfootballR::load_match_comp_results()`
6 |
7 |
8 |
9 | The below is a list of all the available competition names to pass to the `comp_name` value in the `worldfootballR::load_match_comp_results()` function:
10 |
11 | ```{r, echo=FALSE, warning=FALSE, message=FALSE}
12 | library(dplyr)
13 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
14 |
15 | # the below cups are one off matches so we don't need scores and fixtures for these:
16 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana")
17 |
18 | latest_cup_seasons <- seasons %>%
19 | # filtering out things that aren't domestic leagues:
20 | filter(!stringr::str_detect(.data$competition_type, "Leagues"),
21 | # and also the single match type cup games:
22 | !.data$competition_name %in% exclusion_cups) %>%
23 | group_by(competition_name) %>% slice_max(season_end_year) %>%
24 | distinct() %>%
25 | select(competition_type,competition_name,country,gender,governing_body,first_season,last_season,tier)
26 |
27 | latest_cup_seasons %>% pull(competition_name)
28 | ```
29 |
30 |
--------------------------------------------------------------------------------
/data/match_results_cups/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Competition Names for `worldfootballR::load_match_comp_results()`
3 |
4 |
5 |
6 | The below is a list of all the available competition names to pass to
7 | the `comp_name` value in the `worldfootballR::load_match_comp_results()`
8 | function:
9 |
10 | ## [1] "AFC Asian Cup"
11 | ## [2] "AFC Asian Cup qualification"
12 | ## [3] "AFC Women's Asian Cup"
13 | ## [4] "AFC Women's Asian Cup Qualification"
14 | ## [5] "Africa Cup of Nations"
15 | ## [6] "Africa Cup of Nations qualification"
16 | ## [7] "Africa Women Cup of Nations"
17 | ## [8] "Algarve Cup"
18 | ## [9] "CONCACAF Gold Cup"
19 | ## [10] "CONCACAF W Championship"
20 | ## [11] "Copa America"
21 | ## [12] "Copa América Femenina"
22 | ## [13] "Copa del Rey"
23 | ## [14] "Copa Libertadores de América"
24 | ## [15] "Copa Sudamericana"
25 | ## [16] "Coppa Italia"
26 | ## [17] "Coupe de France"
27 | ## [18] "Coupe de la Ligue"
28 | ## [19] "DFB-Pokal"
29 | ## [20] "DFB-Pokal Frauen"
30 | ## [21] "English Football League Cup"
31 | ## [22] "European Championship"
32 | ## [23] "FA Cup"
33 | ## [24] "FIFA Confederations Cup"
34 | ## [25] "FIFA Women's World Cup"
35 | ## [26] "FIFA Women's World Cup Qualification (UEFA)"
36 | ## [27] "FIFA World Cup"
37 | ## [28] "FIFA World Cup Qualification — AFC"
38 | ## [29] "FIFA World Cup Qualification — CAF"
39 | ## [30] "FIFA World Cup Qualification — CONCACAF"
40 | ## [31] "FIFA World Cup Qualification — CONMEBOL"
41 | ## [32] "FIFA World Cup Qualification — OFC"
42 | ## [33] "FIFA World Cup Qualification — UEFA"
43 | ## [34] "International Friendlies (M)"
44 | ## [35] "International Friendlies (W)"
45 | ## [36] "NWSL Challenge Cup"
46 | ## [37] "NWSL Fall Series"
47 | ## [38] "OFC Nations Cup"
48 | ## [39] "OFC Women's Nations Cup"
49 | ## [40] "Olympics – Women's Tournament"
50 | ## [41] "SheBelieves Cup"
51 | ## [42] "UEFA Champions League"
52 | ## [43] "UEFA Euro Qualification"
53 | ## [44] "UEFA Europa Conference League"
54 | ## [45] "UEFA Europa League"
55 | ## [46] "UEFA Nations League"
56 | ## [47] "UEFA Women's Champions League"
57 | ## [48] "UEFA Women's Championship"
58 | ## [49] "UEFA Women's Euro Qualification"
59 |
--------------------------------------------------------------------------------
/data/match_results_cups/afc_asian_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_asian_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/afc_asian_cup_qualification_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_asian_cup_qualification_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/afc_womens_asian_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_womens_asian_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/afc_womens_asian_cup_qualification_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_womens_asian_cup_qualification_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/africa_cup_of_nations_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_cup_of_nations_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/africa_cup_of_nations_qualification_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_cup_of_nations_qualification_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/africa_women_cup_of_nations_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_women_cup_of_nations_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/algarve_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/algarve_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/concacaf_gold_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/concacaf_gold_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/concacaf_w_championship_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/concacaf_w_championship_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/copa_america_femenina_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_america_femenina_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/copa_america_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_america_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/copa_del_rey_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_del_rey_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/copa_libertadores_de_america_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_libertadores_de_america_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/copa_sudamericana_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_sudamericana_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/coppa_italia_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coppa_italia_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/coupe_de_france_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coupe_de_france_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/coupe_de_la_ligue_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coupe_de_la_ligue_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/dfb_pokal_frauen_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/dfb_pokal_frauen_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/dfb_pokal_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/dfb_pokal_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/efl_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/efl_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/english_football_league_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/english_football_league_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/european_championship_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/european_championship_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fa_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fa_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_confederations_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_confederations_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_womens_world_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_womens_world_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_womens_world_cup_qualification_uefa_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_womens_world_cup_qualification_uefa_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_afc_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_afc_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_caf_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_caf_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_concacaf_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_concacaf_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_conmebol_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_conmebol_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_ofc_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_ofc_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/fifa_world_cup_qualification_uefa_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_uefa_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/international_friendlies_m_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/international_friendlies_m_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/international_friendlies_w_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/international_friendlies_w_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/nwsl_challenge_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/nwsl_challenge_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/nwsl_fall_series_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/nwsl_fall_series_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/ofc_nations_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/ofc_nations_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/ofc_womens_nations_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/ofc_womens_nations_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/olympics_womens_tournament_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/olympics_womens_tournament_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/she_believes_cup_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/she_believes_cup_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_champions_league_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_champions_league_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_euro_qualification_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_euro_qualification_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_europa_conference_league_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_europa_conference_league_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_europa_league_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_europa_league_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_european_football_championship_qualifying_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_european_football_championship_qualifying_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_nations_league_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_nations_league_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_womens_champions_league_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_champions_league_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_womens_championship_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_championship_match_results.rds
--------------------------------------------------------------------------------
/data/match_results_cups/uefa_womens_euro_qualification_match_results.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_euro_qualification_match_results.rds
--------------------------------------------------------------------------------
/data/tm_player_vals/big5_player_vals.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/tm_player_vals/big5_player_vals.rds
--------------------------------------------------------------------------------
/data/tm_transfers/big_5_transfers.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/tm_transfers/big_5_transfers.rds
--------------------------------------------------------------------------------
/data/understat_shots/bundesliga_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/bundesliga_shot_data.rds
--------------------------------------------------------------------------------
/data/understat_shots/epl_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/epl_shot_data.rds
--------------------------------------------------------------------------------
/data/understat_shots/la_liga_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/la_liga_shot_data.rds
--------------------------------------------------------------------------------
/data/understat_shots/ligue_1_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/ligue_1_shot_data.rds
--------------------------------------------------------------------------------
/data/understat_shots/rfpl_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/rfpl_shot_data.rds
--------------------------------------------------------------------------------
/data/understat_shots/serie_a_shot_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/serie_a_shot_data.rds
--------------------------------------------------------------------------------
/man/figures/hex_sticker.R:
--------------------------------------------------------------------------------
1 |
2 | # install.packages("ggsoccer")
3 | library(hexSticker)
4 | library(ggplot2)
5 | library(ggsoccer)
6 |
7 | setwd(here::here("man"))
8 | sysfonts::font_add_google(name = "Chivo", family = "chivo")
9 | sysfonts::font_add_google(name = "Play", family = "play")
10 |
11 | pitch <- ggplot() +
12 | annotate_pitch(fill = "#538032", colour = "grey30") +
13 | theme_pitch() +
14 | theme(panel.background = element_rect(fill = "#538032"))
15 |
16 |
17 | pitch <- pitch + theme_void() + theme_transparent()
18 |
19 |
20 | sticker(pitch,
21 | package="worldfootballR data",
22 | p_family = "play", p_size=11, p_color = "grey30",
23 | s_x=1, s_y=.8, s_width=1.3, s_height=0.85,
24 | h_fill = "#538032", h_color = "grey30",
25 | url = "https://github.com/JaseZiv/worldfootballR_data", u_y = 0.07, u_x = 1.0, u_size = 3, u_color = "white", u_family = "play",
26 | filename="figures/logo.png")
27 |
28 | # smaller size hex logo:
29 | sticker(pitch,
30 | package="worldfootballR_data",
31 | p_family = "play", p_size=11, p_color = "grey30",
32 | s_x=1, s_y=.8, s_width=1.3, s_height=0.85,
33 | h_fill = "#538032",
34 | url = "hhttps://github.com/JaseZiv/worldfootballR_data", u_y = 0.07, u_x = 1.0, u_size = 3, u_color = "white", u_family = "play",
35 | filename="figures/logo_small_size.png") # modify size in viewer to dimensions 181x209 as a png
36 |
37 |
38 | ###########################################################################
39 | # Different Options: ------------------------------------------------------
40 |
41 | # sticker(pitch,
42 | # package="worldfootballR",
43 | # p_size=6, p_color = "white",
44 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85,
45 | # h_fill = "#538032",
46 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.09, u_x = 1.05, u_size = 1.2, u_color = "white",
47 | # filename="man/figures/logo_standard.png")
48 | #
49 | #
50 | # sticker(pitch,
51 | # package="worldfootballR",
52 | # p_family = "chivo",
53 | # p_size=6, p_color = "white",
54 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85,
55 | # h_fill = "#538032",
56 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.07, u_x = 1.0, u_size = 1.2, u_color = "white", u_family = "chivo",
57 | # filename="man/figures/logo_chivo.png")
58 | #
59 | #
60 | # sticker(pitch,
61 | # package="worldfootballR",
62 | # p_family = "play",
63 | # p_size=6, p_color = "white",
64 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85,
65 | # h_fill = "#538032",
66 | # h_color = "black",
67 | # spotlight = T, l_y = 0.83,
68 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.07, u_x = 1.0, u_size = 1.2, u_color = "white", u_family = "play",
69 | # filename="man/figures/logo_play_black_border.png")
70 |
71 |
72 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/man/figures/logo.png
--------------------------------------------------------------------------------
/man/figures/logo_small_size.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/man/figures/logo_small_size.png
--------------------------------------------------------------------------------
/raw-data/countries_list/get_countries_list.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | library(rvest)
3 |
4 | fb_country_leagues <- function() {
5 |
6 | main_url <- "https://fbref.com"
7 | countries_page <- xml2::read_html("https://fbref.com/en/countries/")
8 |
9 | country_holder <- countries_page %>% rvest::html_nodes("#countries") %>% rvest::html_nodes("tbody") %>% rvest::html_nodes("tr")
10 |
11 | idx <- 0
12 | countries_df <- data.frame()
13 |
14 | for(each_row in country_holder) {
15 | idx <- idx + 1
16 | countries_df[idx, "country"] <- tryCatch(each_row %>% rvest::html_nodes(".left:nth-child(1) a") %>% rvest::html_text(), error = function(e) NA_character_)
17 | countries_df[idx, "country_url"] <- tryCatch(each_row %>% rvest::html_nodes(".left:nth-child(1) a") %>% rvest::html_attr("href") %>% paste0(main_url, .) %>% paste(collapse = ",\n"), error = function(e) NA_character_)
18 |
19 | if(is_empty(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_text())) {
20 | countries_df[idx, "league_name"] <- NA_character_
21 | } else {
22 | countries_df[idx, "league_name"] <- tryCatch(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_text() %>% paste(collapse = ",\n"), error = function(e) NA_character_)
23 | }
24 |
25 | if(is_empty(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_attr("href"))) {
26 | countries_df[idx, "league_url"] <- NA_character_
27 | } else {
28 | countries_df[idx, "league_url"] <- tryCatch(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_attr("href") %>% paste0(main_url, .) %>% paste(collapse = ",\n"), error = function(e) NA_character_)
29 | }
30 |
31 | }
32 |
33 | countries_df <- countries_df %>% dplyr::mutate(has_leage_page = !is.na(league_name))
34 |
35 | return(countries_df)
36 | }
37 |
38 | # scrape the data
39 | countries_df <- fb_country_leagues()
40 |
41 | # write the final data
42 | write.csv(countries_df, here::here("raw-data", "countries_list", "countries_df.csv"), row.names = F)
43 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/README.md:
--------------------------------------------------------------------------------
1 | # Mapping FBref and Transfermarkt Players
2 |
3 | This section creates a map of player URLs from FBref players to the relevant player's data on Transfermarkt.
4 |
5 | Currently, the mappings are for players who have played in the following man's leagues since the start of the 2017-18 season:
6 |
7 | * Top 5 European leagues
8 | * MLS
9 | * Eredivise
10 | * Portuguese Primeira Liga
11 | * Campeonato Brasileiro Serie A
12 | * Liga MX
13 | * English Championship
14 |
15 | I aim to update this fairly frequently, so that players who subsequently appear on FBref in these leagues will continue to be mapped.
16 |
17 | ***
18 |
19 | ## Show your support
20 | Follow me on Twitter ([jaseziv](https://twitter.com/jaseziv)) for updates
21 |
22 | If this data helps you, all I ask is that you star this repo. If you did want to show your support and contribute to server time and data storage costs, feel free to send a small donation through the link below.
23 |
24 |
25 |
26 | ***
27 |
28 | ## Usage
29 |
30 | To update the data, first run `prepare_working_files.R`. This will generate a list of csv outputs. There are two that will potentially need to be actioned:
31 |
32 | * `joined_missing.csv` contains the players who haven't been able to be matched by the automated script. These need to be manually investigated and then overwrite the `joined_missing_manual_fix.csv` file
33 | * `duplicate_players_df.csv` contains a list of players who have been joined using the automated script, however duplicates have arisen. Manually fix these duplicates by removing the spurious matches, then save to file called `duplicate_players_df_manual_fix.csv`.
34 |
35 | Once these files have been manually fixed, run `create_final_data.R` and the final output file will be written to [`output/fbref_to_tm_mapping.csv`](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv).
36 |
37 | ### Update (2021-10-29): Write to Googlesheets
38 |
39 | The project also writes the mapped data to a gogglesheet, found [here](https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932).
40 |
41 |
42 | ### Update (2022-08-11) Update Player Positions
43 |
44 | For players that were mapped originally, some of these may have changed positions since the initial map. To get current TM positions, run the file names `update_player_positions.R`.
45 |
46 | ***
47 |
48 | ## Contributing
49 |
50 | If anyone wants to contribute mapped players for different leagues, feel free to get in touch with me on Twitter [here](https://twitter.com/jaseziv), create an issue in [`worldfootballR`](https://github.com/JaseZiv/worldfootballR) or email me on `jaseziv83@gmail.com`.
51 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/create_final_data.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(googlesheets4)
4 | library(here)
5 |
6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
7 |
8 | # read in files
9 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_finished.csv"), stringsAsFactors = F)
10 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing_manual_fix.csv"), stringsAsFactors = F)
11 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F) %>%
12 | select(-fbref_surname, -player_name), error = function(e) data.frame())
13 |
14 |
15 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>%
16 | arrange(Player) %>%
17 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url))
18 |
19 |
20 | matched_data <- matched_data %>%
21 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position)
22 |
23 | # some players won't have a position listed (because they haven't been matched automatically)
24 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt)
25 |
26 | # for these URLs, we can get their positions using the `tm_player_bio` function
27 | missing_pos_bios <- tm_player_bio(player_urls = missing_pos)
28 |
29 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder"
30 | # we only want "Left Midfiender"
31 | missing_pos_bios <- missing_pos_bios %>%
32 | mutate(TmPos = case_when(
33 | grepl(" - ", position) ~ gsub(".*- ", "", position),
34 | TRUE ~ position
35 | ))
36 |
37 | # join the present and missing player data
38 | matched_data <- matched_data %>%
39 | filter(!is.na(UrlTmarkt)) %>%
40 | filter(!is.na(TmPos)) %>%
41 | bind_rows(
42 | matched_data %>%
43 | filter(!is.na(UrlTmarkt)) %>%
44 | filter(is.na(TmPos)) %>%
45 | select(-TmPos) %>%
46 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL"))
47 | ) %>%
48 | arrange(PlayerFBref)
49 |
50 |
51 | # create final output df
52 | final_output <- bind_rows(existing_df, matched_data) %>%
53 | arrange(PlayerFBref) %>%
54 | distinct(UrlFBref, .keep_all=T)
55 |
56 | #=============
57 | # Write Files
58 | #=============
59 |
60 | # write file for commit to GitHub:
61 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE)
62 |
63 | # Write file to Googlesheets:
64 | # get the sheet id
65 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>%
66 | as.character()
67 |
68 | # write the sheet
69 | sheet_write(final_output,
70 | ss,
71 | sheet = "fbref_to_tm_mapping")
72 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/data/tm_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/data/tm_data.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/create_final_data.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(googlesheets4)
4 | library(here)
5 |
6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
7 |
8 | # read in files
9 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "output", "working-files", "joined_finished.csv"), stringsAsFactors = F)
10 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing_manual_fix.csv"), stringsAsFactors = F)
11 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F) %>%
12 | select(-fbref_surname, -player_name), error = function(e) data.frame())
13 |
14 |
15 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>%
16 | arrange(Player) %>%
17 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url))
18 |
19 |
20 | matched_data <- matched_data %>%
21 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position)
22 |
23 | # some players won't have a position listed (because they haven't been matched automatically)
24 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt)
25 |
26 | # for these URLs, we can get their positions using the `tm_player_bio` function
27 | missing_pos_bios <- tm_player_bio(player_urls = missing_pos)
28 |
29 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder"
30 | # we only want "Left Midfiender"
31 | missing_pos_bios <- missing_pos_bios %>%
32 | mutate(TmPos = case_when(
33 | grepl(" - ", position) ~ gsub(".*- ", "", position),
34 | TRUE ~ position
35 | ))
36 |
37 | # join the present and missing player data
38 | matched_data <- matched_data %>%
39 | filter(!is.na(UrlTmarkt)) %>%
40 | filter(!is.na(TmPos)) %>%
41 | bind_rows(
42 | matched_data %>%
43 | filter(!is.na(UrlTmarkt)) %>%
44 | filter(is.na(TmPos)) %>%
45 | select(-TmPos) %>%
46 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL"))
47 | ) %>%
48 | arrange(PlayerFBref)
49 |
50 |
51 | # create final output df
52 | final_output <- bind_rows(existing_df, matched_data) %>%
53 | arrange(PlayerFBref) %>%
54 | distinct(UrlFBref, .keep_all=T)
55 |
56 | #=============
57 | # Write Files
58 | #=============
59 |
60 | # write file for commit to GitHub:
61 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE)
62 |
63 | # Write file to Googlesheets:
64 | # get the sheet id
65 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>%
66 | as.character()
67 |
68 | # write the sheet
69 | sheet_write(final_output,
70 | ss,
71 | sheet = "fbref_to_tm_mapping")
72 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/build_mapping_dictionary.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | fbref <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_extra_leagues.rds"))
6 | tm1 <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_extra_tier1.rds"))
7 | tm2 <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_championship.rds"))
8 |
9 | tm <- bind_rows(tm1,tm2)
10 |
11 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
12 |
13 |
14 | fbref <- fbref %>% filter(!Url %in% matched_data$UrlFBref)
15 |
16 |
17 | fbref <- fbref %>%
18 | select(season_end_year, Squad, competition_name, Player, Nation, Born, Url) %>%
19 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>%
20 | arrange(Player, Url, desc(season_end_year)) %>%
21 | distinct(Url, .keep_all = T)
22 |
23 | tm <- tm %>%
24 | select(comp_name, region, country, season_start_year, squad, player_name, player_position,
25 | player_dob, player_nationality, player_market_value_euro, player_url) %>%
26 | arrange(player_name)
27 |
28 | # want a df to help with inspection of names with special characters
29 | tm_unique <- tm %>%
30 | arrange(player_name, player_url, desc(season_start_year)) %>%
31 | distinct(player_name, player_dob, player_url, .keep_all = T) %>%
32 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)),
33 | tm_yob = as.character(lubridate::year(player_dob)))
34 |
35 | #----- primary join type: -----#
36 | # here I will join the two datasets on the player names
37 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
38 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name"))
39 |
40 | joined_primary <- fbref %>% select(Player, Born, Url, Squad, Nation) %>% distinct(Url, .keep_all = T) %>%
41 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob, tm_squad=squad, tm_nationality=player_nationality, player_position) %>% distinct(player_url, .keep_all = T),
42 | by = c("Player" = "player_name", "Born" = "tm_yob"))
43 |
44 | # arrange by player name
45 | joined_primary <- joined_primary %>% arrange(Player)
46 |
47 |
48 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example
49 | # will need to manually go through each of these to map the correct player
50 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url)
51 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players)
52 |
53 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB
54 | # no_longer_dups <- duplicate_players %>%
55 | # mutate(tm_yob = lubridate::year(player_dob)) %>%
56 | # filter(Born == tm_yob)
57 | #
58 | # still_dups <- no_longer_dups %>%
59 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique()
60 | #
61 | # still_dups <- duplicate_players %>%
62 | # filter(Url %in% still_dups)
63 | #
64 | # no_longer_dups <- no_longer_dups %>%
65 | # filter(!Url %in% still_dups$Url)
66 |
67 | # now remove these records from the raw joined data
68 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df
69 | joined_primary <- joined_primary %>%
70 | filter(!Url %in% duplicate_players$Url)
71 |
72 | # get a full list of joins on full player name that I'm happy with
73 | joined_complete <- joined_primary %>%
74 | filter(!is.na(player_url))
75 |
76 | # get a list of records where there were no matches on full player name
77 | joined_missing <- joined_primary %>%
78 | filter(is.na(player_url))
79 |
80 | #----- secondary join type: -----#
81 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players
82 | joined_secondary <- joined_missing %>% select(-player_position) %>%
83 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>%
84 | select(-player_dob, -player_url) %>%
85 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob"))
86 |
87 | # now there are some more duplicates as a result of this secondary join method
88 | additional_duplicated_players <- joined_secondary %>%
89 | filter(!is.na(player_url)) %>%
90 | count(Player, Url, sort = T) %>%
91 | filter(n > 1) %>% pull(Url) %>% unique()
92 |
93 | additional_duplicated_players <- joined_secondary %>%
94 | filter(Url %in% additional_duplicated_players)
95 |
96 |
97 | # combine all duplicated joins for manual rework:
98 | duplicate_players <- duplicate_players %>%
99 | bind_rows(additional_duplicated_players)
100 |
101 | duplicate_players <- duplicate_players %>%
102 | select(-fbref_surname, -player_name)
103 |
104 |
105 | joined_secondary <- joined_secondary %>%
106 | filter(!is.na(player_url),
107 | !Url %in% additional_duplicated_players$Url) %>%
108 | select(Player, Born, Url, player_dob, player_url, player_position)
109 |
110 |
111 | joined_finished <- joined_complete %>%
112 | filter(!is.na(player_url)) %>%
113 | bind_rows(joined_secondary)
114 |
115 | joined_finished <- joined_finished %>%
116 | select(-Squad, -Nation, -tm_squad, -tm_nationality)
117 |
118 |
119 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`:
120 | joined_missing <- joined_missing %>%
121 | filter(!Url %in% joined_finished$Url,
122 | !Url %in% duplicate_players$Url)
123 |
124 |
125 | # write files to work on manually
126 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "joined_finished.csv"), row.names = F)
127 | write.csv(joined_missing, here("output", "initial-match", "working-files", "joined_missing.csv"), row.names = F)
128 | write.csv(tm_unique, here("output", "initial-match", "working-files", "tm_unique.csv"), row.names = F)
129 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "working-files", "duplicate_players_df.csv"), row.names = F)
130 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/create_final_data_initial.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(googlesheets4)
4 | library(here)
5 |
6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
7 |
8 | # read in files
9 | joined_finished <- read.csv(file.path("joined_finished.csv"), stringsAsFactors = F)
10 | joined_missing <- read.csv(file.path("working-files", "joined_missing.csv"), stringsAsFactors = F)
11 | duplicate_players <- tryCatch(read.csv(file.path("working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F),
12 | error = function(e) data.frame())
13 |
14 | duplicate_players <- duplicate_players %>% select(Player, Born, Url, player_dob, player_url, player_position)
15 |
16 |
17 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>%
18 | arrange(Player) %>%
19 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url))
20 |
21 |
22 | matched_data <- matched_data %>%
23 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position)
24 |
25 | # some players won't have a position listed (because they haven't been matched automatically)
26 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt)
27 |
28 | # for these URLs, we can get their positions using the `tm_player_bio` function
29 |
30 | missing_pos_bios <- data.frame()
31 |
32 | for (i in 1:length(missing_pos)) {
33 | print(paste0("scraping ", i, "of", length(missing_pos)))
34 | df <- tryCatch(tm_player_bio(player_urls = missing_pos[i]), error = function(e) data.frame())
35 | missing_pos_bios <- bind_rows(missing_pos_bios, df)
36 | }
37 |
38 |
39 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder"
40 | # we only want "Left Midfiender"
41 | missing_pos_bios <- missing_pos_bios %>%
42 | mutate(TmPos = case_when(
43 | grepl(" - ", position) ~ gsub(".*- ", "", position),
44 | TRUE ~ position
45 | ))
46 |
47 | # join the present and missing player data
48 | matched_data <- matched_data %>%
49 | filter(!is.na(UrlTmarkt)) %>%
50 | filter(!is.na(TmPos)) %>%
51 | bind_rows(
52 | matched_data %>%
53 | filter(!is.na(UrlTmarkt)) %>%
54 | filter(is.na(TmPos)) %>%
55 | select(-TmPos) %>%
56 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL"))
57 | ) %>%
58 | arrange(PlayerFBref)
59 |
60 |
61 | # create final output df
62 | final_output <- bind_rows(existing_df, matched_data) %>%
63 | arrange(PlayerFBref) %>%
64 | distinct(UrlFBref, .keep_all=T)
65 |
66 | #=============
67 | # Write Files
68 | #=============
69 |
70 | # write file for commit to GitHub:
71 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE)
72 |
73 | # Write file to Googlesheets:
74 | # get the sheet id
75 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>%
76 | as.character()
77 |
78 | # write the sheet
79 | sheet_write(final_output,
80 | ss,
81 | sheet = "fbref_to_tm_mapping")
82 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_extra_leagues.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_extra_leagues.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_mls.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_mls.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_selenium.R:
--------------------------------------------------------------------------------
1 |
2 | library(RSelenium)
3 | library(xml2)
4 | library(rvest)
5 | library(tidyverse)
6 |
7 |
8 | # Set Up Selenium ---------------------------------------------------------
9 |
10 | rD <- rsDriver(browser="firefox", port=4445L, verbose=TRUE)
11 | remDr <- rD[["client"]]
12 |
13 | # remDr$navigate("https://fbref.com/en/comps/23/2020-2021/playingtime/2020-2021-Eredivisie-Stats")
14 |
15 |
16 | # function to open page
17 | read_html_selenium <- function (page_url, driver, sleep) {
18 |
19 | if (missing(driver)) {
20 | driver <- remDr
21 |
22 | }
23 |
24 | if (missing(sleep)) {
25 | sleep <- 0
26 | }
27 |
28 |
29 | remDr$navigate(page_url)
30 | Sys.sleep(1)
31 | # need to get to the bottom of the page to expose all 36 products per page
32 | webElem <- remDr$findElement("css", "body")
33 | Sys.sleep(1)
34 | webElem$sendKeysToElement(list(key = "end"))
35 | Sys.sleep(2)
36 | # webElem <- remDr$findElement("css", ".paginator")
37 | # webElem$sendKeysToElement(list(key = "end"))
38 |
39 | remDr$getPageSource(.) %>%
40 | .[[1]] %>% .[1] %>% read_html(.)
41 |
42 | }
43 |
44 |
45 |
46 | # Variables ---------------------------------------------------------------
47 |
48 | main_url <- "https://fbref.com"
49 |
50 |
51 | country_abbr <- c("NED", "BRA", "MEX", "POR")
52 | gender_M_F <- "M"
53 | season_end_year_num <- c(2019:2023)
54 | comp_tier <- "1st"
55 |
56 |
57 |
58 | # Get Seasons URLs ---------------------------------------------------------
59 |
60 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
61 |
62 | seasons_urls <- seasons %>%
63 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>%
64 | dplyr::filter(country %in% country_abbr,
65 | gender %in% gender_M_F,
66 | season_end_year %in% season_end_year_num,
67 | tier %in% comp_tier) %>%
68 | dplyr::arrange(season_end_year) %>%
69 | dplyr::pull(seasons_urls) %>% unique()
70 |
71 |
72 | championship_seasons_urls <- seasons %>%
73 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>%
74 | dplyr::filter(country == "ENG",
75 | gender == "M",
76 | season_end_year %in% c(2019:2023),
77 | tier == "2nd") %>%
78 | dplyr::arrange(season_end_year) %>%
79 | dplyr::pull(seasons_urls) %>% unique()
80 |
81 |
82 | seasons_urls <- c(seasons_urls, championship_seasons_urls)
83 |
84 |
85 |
86 | # Scrape FBREF ------------------------------------------------------------
87 | fbref <- data.frame()
88 |
89 | for(season_url in seasons_urls) {
90 |
91 | print(paste0("scraping season: ", season_url))
92 |
93 | start_part <- sub('/[^/]*$', '', season_url)
94 | end_part <- gsub(".*/", "", season_url)
95 |
96 | stat_urls <- paste0(start_part, "/", "playingtime", "/", end_part)
97 |
98 | Sys.sleep(5)
99 | pg <- read_html_selenium(stat_urls)
100 |
101 | tab_elem <- pg %>% html_elements("#div_stats_playing_time")
102 |
103 | urls <- tab_elem %>%
104 | rvest::html_nodes("table") %>%
105 | rvest::html_nodes("tbody") %>%
106 | rvest::html_nodes("tr") %>% rvest::html_node("td a") %>% rvest::html_attr("href") %>% paste0(main_url, .)
107 |
108 | stat_df <- tab_elem %>% html_table() %>% data.frame()
109 | stat_df <- stat_df[,c(2,3,4,5,7)]
110 | stat_df_names <- stat_df[1,] %>% as.character()
111 | stat_df <- stat_df[-1,]
112 | colnames(stat_df) <- stat_df_names
113 |
114 | stat_df$Url <- urls
115 | stat_df$season_url <- season_url
116 |
117 | stat_df <- stat_df %>%
118 | filter(Nation != "Nation")
119 |
120 | stat_df <- stat_df %>%
121 | left_join(seasons %>% select(season_end_year, competition_name, seasons_urls), by = c("season_url" = "seasons_urls"))
122 |
123 | fbref <- bind_rows(fbref, stat_df)
124 | }
125 |
126 |
127 |
128 | setwd("../")
129 | saveRDS(fbref, "fbref_extra_leagues.rds")
130 |
131 |
132 |
133 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/get_tm_extra_leagues.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 |
6 |
7 |
8 | valuations <- tm_player_market_values(country_name = c("Netherlands", "Portugal", "Brazil", "Mexico"),
9 | start_year = c(2018:2022))
10 |
11 | saveRDS(valuations, "tm_players_extra_tier1.rds")
12 |
13 |
14 | #
15 |
16 | champ_valuations <- tm_player_market_values(country_name = "", start_year = c(2018:2022), league_url = "https://www.transfermarkt.com/championship/startseite/wettbewerb/GB2")
17 |
18 | saveRDS(champ_valuations, "tm_players_championship.rds")
19 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/build_mapping_dictionary.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | fbref <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_mls.rds"))
6 | tm <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_mls.rds"))
7 |
8 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
9 |
10 |
11 | fbref <- fbref %>% filter(!Url %in% matched_data$UrlFBref)
12 |
13 |
14 | fbref <- fbref %>%
15 | select(season_end_year, Squad, competition_name, Player, Nation, Born, Url) %>%
16 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>%
17 | arrange(Player, Url, desc(season_end_year)) %>%
18 | distinct(Url, .keep_all = T)
19 |
20 | tm <- tm %>%
21 | select(comp_name, region, country, season_start_year, squad, player_name, player_position,
22 | player_dob, player_nationality, player_market_value_euro, player_url) %>%
23 | arrange(player_name)
24 |
25 | # want a df to help with inspection of names with special characters
26 | tm_unique <- tm %>%
27 | arrange(player_name, player_url, desc(season_start_year)) %>%
28 | distinct(player_name, player_dob, player_url, .keep_all = T) %>%
29 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)),
30 | tm_yob = as.character(lubridate::year(player_dob)))
31 |
32 | #----- primary join type: -----#
33 | # here I will join the two datasets on the player names
34 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
35 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name"))
36 |
37 | joined_primary <- fbref %>% select(Player, Born, Url, Squad, Nation) %>% distinct(Url, .keep_all = T) %>%
38 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob, tm_squad=squad, tm_nationality=player_nationality, player_position) %>% distinct(player_url, .keep_all = T),
39 | by = c("Player" = "player_name", "Born" = "tm_yob"))
40 |
41 | # arrange by player name
42 | joined_primary <- joined_primary %>% arrange(Player)
43 |
44 |
45 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example
46 | # will need to manually go through each of these to map the correct player
47 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url)
48 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players)
49 |
50 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB
51 | # no_longer_dups <- duplicate_players %>%
52 | # mutate(tm_yob = lubridate::year(player_dob)) %>%
53 | # filter(Born == tm_yob)
54 | #
55 | # still_dups <- no_longer_dups %>%
56 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique()
57 | #
58 | # still_dups <- duplicate_players %>%
59 | # filter(Url %in% still_dups)
60 | #
61 | # no_longer_dups <- no_longer_dups %>%
62 | # filter(!Url %in% still_dups$Url)
63 |
64 | # now remove these records from the raw joined data
65 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df
66 | joined_primary <- joined_primary %>%
67 | filter(!Url %in% duplicate_players$Url)
68 |
69 | # get a full list of joins on full player name that I'm happy with
70 | joined_complete <- joined_primary %>%
71 | filter(!is.na(player_url))
72 |
73 | # get a list of records where there were no matches on full player name
74 | joined_missing <- joined_primary %>%
75 | filter(is.na(player_url))
76 |
77 | #----- secondary join type: -----#
78 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players
79 | joined_secondary <- joined_missing %>% select(-player_position) %>%
80 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>%
81 | select(-player_dob, -player_url) %>%
82 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob"))
83 |
84 | # now there are some more duplicates as a result of this secondary join method
85 | additional_duplicated_players <- joined_secondary %>%
86 | filter(!is.na(player_url)) %>%
87 | count(Player, Url, sort = T) %>%
88 | filter(n > 1) %>% pull(Url) %>% unique()
89 |
90 | additional_duplicated_players <- joined_secondary %>%
91 | filter(Url %in% additional_duplicated_players)
92 |
93 |
94 | # combine all duplicated joins for manual rework:
95 | duplicate_players <- duplicate_players %>%
96 | bind_rows(additional_duplicated_players)
97 |
98 | duplicate_players <- duplicate_players %>%
99 | select(-fbref_surname, -player_name)
100 |
101 |
102 | joined_secondary <- joined_secondary %>%
103 | filter(!is.na(player_url),
104 | !Url %in% additional_duplicated_players$Url) %>%
105 | select(Player, Born, Url, player_dob, player_url, player_position)
106 |
107 |
108 | joined_finished <- joined_complete %>%
109 | filter(!is.na(player_url)) %>%
110 | bind_rows(joined_secondary)
111 |
112 | joined_finished <- joined_finished %>%
113 | select(-Squad, -Nation, -tm_squad, -tm_nationality)
114 |
115 |
116 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`:
117 | joined_missing <- joined_missing %>%
118 | filter(!Url %in% joined_finished$Url,
119 | !Url %in% duplicate_players$Url)
120 |
121 |
122 | # write files to work on manually
123 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_finished.csv"), row.names = F)
124 | write.csv(joined_missing, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_missing.csv"), row.names = F)
125 | write.csv(tm_unique, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "tm_unique.csv"), row.names = F)
126 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "duplicate_players_df.csv"), row.names = F)
127 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/create_final_data_initial.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(googlesheets4)
4 | library(here)
5 |
6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
7 |
8 |
9 | # read in files
10 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_finished.csv"), stringsAsFactors = F)
11 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_missing_manual_fix.csv"), stringsAsFactors = F)
12 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F),
13 | error = function(e) data.frame())
14 |
15 | duplicate_players <- duplicate_players %>% select(Player, Born, Url, player_dob, player_url, player_position)
16 |
17 |
18 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>%
19 | arrange(Player) %>%
20 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url))
21 |
22 |
23 | matched_data <- matched_data %>%
24 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position)
25 |
26 | # some players won't have a position listed (because they haven't been matched automatically)
27 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt)
28 |
29 | # for these URLs, we can get their positions using the `tm_player_bio` function
30 |
31 | missing_pos_bios <- data.frame()
32 |
33 | for (i in 1:length(missing_pos)) {
34 | print(paste0("scraping ", i, "of", length(missing_pos)))
35 | df <- tryCatch(tm_player_bio(player_urls = missing_pos[i]), error = function(e) data.frame())
36 | missing_pos_bios <- bind_rows(missing_pos_bios, df)
37 | }
38 |
39 |
40 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder"
41 | # we only want "Left Midfiender"
42 | missing_pos_bios <- missing_pos_bios %>%
43 | mutate(TmPos = case_when(
44 | grepl(" - ", position) ~ gsub(".*- ", "", position),
45 | TRUE ~ position
46 | ))
47 |
48 | # join the present and missing player data
49 | matched_data <- matched_data %>%
50 | filter(!is.na(UrlTmarkt)) %>%
51 | filter(!is.na(TmPos)) %>%
52 | bind_rows(
53 | matched_data %>%
54 | filter(!is.na(UrlTmarkt)) %>%
55 | filter(is.na(TmPos)) %>%
56 | select(-TmPos) %>%
57 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL"))
58 | ) %>%
59 | arrange(PlayerFBref)
60 |
61 |
62 | # create final output df
63 | final_output <- bind_rows(existing_df, matched_data) %>%
64 | arrange(PlayerFBref) %>%
65 | distinct(UrlFBref, .keep_all=T)
66 |
67 | #=============
68 | # Write Files
69 | #=============
70 |
71 | # write file for commit to GitHub:
72 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE)
73 |
74 | # Write file to Googlesheets:
75 | # get the sheet id
76 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>%
77 | as.character()
78 |
79 | # write the sheet
80 | sheet_write(final_output,
81 | ss,
82 | sheet = "fbref_to_tm_mapping")
83 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/duplicate_players_df.csv:
--------------------------------------------------------------------------------
1 | "Player","Born","Url","Squad","Nation","player_dob","player_url","tm_squad","tm_nationality","player_position","comp_name","region","country","season_start_year","squad","player_nationality","player_market_value_euro"
2 | "Angelo Rodríguez","1989","https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez","Minnesota Utd","co COL",1989-04-04,"https://www.transfermarkt.com/angelo-rodriguez/profil/spieler/178116",NA,NA,"Centre-Forward","MLS","Americas","United States",2018,"Minnesota United FC","Colombia",1250000
3 | "Angelo Rodríguez","1989","https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez","Minnesota Utd","co COL",1989-07-23,"https://www.transfermarkt.com/victor-rodriguez/profil/spieler/129753",NA,NA,"Attacking Midfield","MLS","Americas","United States",2018,"Seattle Sounders FC","Spain",1500000
4 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-02-06,"https://www.transfermarkt.com/christian-martinez/profil/spieler/419247",NA,NA,"Midfield","MLS","Americas","United States",2018,"Chicago Fire FC","Panama",450000
5 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-06-05,"https://www.transfermarkt.com/douglas-martinez/profil/spieler/443384",NA,NA,"Centre-Forward","MLS","Americas","United States",2020,"Real Salt Lake City","Honduras",6e+05
6 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-03-15,"https://www.transfermarkt.com/isidro-martinez/profil/spieler/585732",NA,NA,"Defensive Midfield","MLS","Americas","United States",2018,"Houston Dynamo FC","United States",NA
7 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-02-12,"https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287",NA,NA,"Centre-Back","MLS","Americas","United States",2021,"FC Dallas","Spain",1500000
8 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-05-19,"https://www.transfermarkt.com/josef-martinez/profil/spieler/162569",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"Atlanta United FC","Venezuela",12500000
9 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-06-13,"https://www.transfermarkt.com/pity-martinez/profil/spieler/281405",NA,NA,"Attacking Midfield","MLS","Americas","United States",2019,"Atlanta United FC","Argentina",1.2e+07
10 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-02-12,"https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287",NA,NA,"Centre-Back","MLS","Americas","United States",2021,"FC Dallas","Spain",1500000
11 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-05-19,"https://www.transfermarkt.com/josef-martinez/profil/spieler/162569",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"Atlanta United FC","Venezuela",12500000
12 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-06-13,"https://www.transfermarkt.com/pity-martinez/profil/spieler/281405",NA,NA,"Attacking Midfield","MLS","Americas","United States",2019,"Atlanta United FC","Argentina",1.2e+07
13 | "Valentín Castellanos","1998","https://fbref.com/en/players/da76bab4/Valentin-Castellanos","NYCFC","ar ARG",1998-05-11,"https://www.transfermarkt.com/robert-castellanos/profil/spieler/488127",NA,NA,"Centre-Back","MLS","Americas","United States",2020,"Nashville SC","United States",2e+05
14 | "Valentín Castellanos","1998","https://fbref.com/en/players/da76bab4/Valentin-Castellanos","NYCFC","ar ARG",1998-10-03,"https://www.transfermarkt.com/taty-castellanos/profil/spieler/522784",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"New York City FC","Argentina",1.2e+07
15 | "William Sands","2000","https://fbref.com/en/players/960a4473/William-Sands","Columbus Crew","us USA",2000-07-06,"https://www.transfermarkt.com/james-sands/profil/spieler/393321",NA,NA,"Centre-Back","MLS","Americas","United States",2020,"New York City FC","United States",2500000
16 | "William Sands","2000","https://fbref.com/en/players/960a4473/William-Sands","Columbus Crew","us USA",2000-07-06,"https://www.transfermarkt.com/will-sands/profil/spieler/393327",NA,NA,"Left-Back","MLS","Americas","United States",2021,"Columbus Crew","United States",50000
17 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/duplicate_players_df_manual_fix.csv:
--------------------------------------------------------------------------------
1 | Player,Born,Url,Squad,Nation,player_dob,player_url,tm_squad,tm_nationality,player_position,comp_name,region,country,season_start_year,squad,player_nationality,player_market_value_euro
2 | Angelo Rodríguez,1989,https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez,Minnesota Utd,co COL,4/4/1989,https://www.transfermarkt.com/angelo-rodriguez/profil/spieler/178116,NA,NA,Centre-Forward,MLS,Americas,United States,2018,Minnesota United FC,Colombia,1250000
3 | Cristian Martínez,1997,https://fbref.com/en/players/fdb15495/Cristian-Martinez,Chicago Fire,pa PAN,6/2/1997,https://www.transfermarkt.com/christian-martinez/profil/spieler/419247,NA,NA,Midfield,MLS,Americas,United States,2018,Chicago Fire FC,Panama,450000
4 | Gonzalo Nicolás Martínez,1993,https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez,Atlanta Utd,ar ARG,13/6/1993,https://www.transfermarkt.com/pity-martinez/profil/spieler/281405,NA,NA,Attacking Midfield,MLS,Americas,United States,2019,Atlanta United FC,Argentina,1.20E+07
5 | José Martínez,1993,https://fbref.com/en/players/8e049cbd/Jose-Martinez,FC Dallas,es ESP,12/2/1993,https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287,NA,NA,Centre-Back,MLS,Americas,United States,2021,FC Dallas,Spain,1500000
6 | Valentín Castellanos,1998,https://fbref.com/en/players/da76bab4/Valentin-Castellanos,NYCFC,ar ARG,3/10/1998,https://www.transfermarkt.com/taty-castellanos/profil/spieler/522784,NA,NA,Centre-Forward,MLS,Americas,United States,2021,New York City FC,Argentina,1.20E+07
7 | William Sands,2000,https://fbref.com/en/players/960a4473/William-Sands,Columbus Crew,us USA,6/7/2000,https://www.transfermarkt.com/will-sands/profil/spieler/393327,NA,NA,Left-Back,MLS,Americas,United States,2021,Columbus Crew,United States,50000
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/get_data.R:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | library(RSelenium)
5 | library(xml2)
6 | library(rvest)
7 | library(tidyverse)
8 |
9 |
10 | # Set Up Selenium ---------------------------------------------------------
11 |
12 | rD <- rsDriver(browser="firefox", port=4445L, verbose=TRUE)
13 | remDr <- rD[["client"]]
14 |
15 | # remDr$navigate("https://fbref.com/en/comps/23/2020-2021/playingtime/2020-2021-Eredivisie-Stats")
16 |
17 |
18 | # function to open page
19 | read_html_selenium <- function (page_url, driver, sleep) {
20 |
21 | if (missing(driver)) {
22 | driver <- remDr
23 |
24 | }
25 |
26 | if (missing(sleep)) {
27 | sleep <- 0
28 | }
29 |
30 |
31 | remDr$navigate(page_url)
32 | Sys.sleep(1)
33 | # need to get to the bottom of the page to expose all 36 products per page
34 | webElem <- remDr$findElement("css", "body")
35 | Sys.sleep(1)
36 | webElem$sendKeysToElement(list(key = "end"))
37 | Sys.sleep(2)
38 | # webElem <- remDr$findElement("css", ".paginator")
39 | # webElem$sendKeysToElement(list(key = "end"))
40 |
41 | remDr$getPageSource(.) %>%
42 | .[[1]] %>% .[1] %>% read_html(.)
43 |
44 | }
45 |
46 |
47 |
48 | # Variables ---------------------------------------------------------------
49 |
50 | main_url <- "https://fbref.com"
51 |
52 |
53 | country_abbr <- c("USA")
54 | gender_M_F <- "M"
55 | season_end_year_num <- c(2019:2023)
56 | comp_tier <- "1st"
57 |
58 |
59 |
60 | # Get Seasons URLs ---------------------------------------------------------
61 |
62 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F)
63 |
64 | seasons_urls <- seasons %>%
65 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>%
66 | dplyr::filter(country %in% country_abbr,
67 | gender %in% gender_M_F,
68 | season_end_year %in% season_end_year_num,
69 | tier %in% comp_tier) %>%
70 | dplyr::arrange(season_end_year) %>%
71 | dplyr::pull(seasons_urls) %>% unique()
72 |
73 |
74 | # championship_seasons_urls <- seasons %>%
75 | # dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>%
76 | # dplyr::filter(country == "ENG",
77 | # gender == "M",
78 | # season_end_year %in% c(2019:2023),
79 | # tier == "2nd") %>%
80 | # dplyr::arrange(season_end_year) %>%
81 | # dplyr::pull(seasons_urls) %>% unique()
82 | #
83 | #
84 | # seasons_urls <- c(seasons_urls, championship_seasons_urls)
85 |
86 |
87 |
88 | # Scrape FBREF ------------------------------------------------------------
89 | fbref <- data.frame()
90 |
91 | for(season_url in seasons_urls) {
92 |
93 | print(paste0("scraping season: ", season_url))
94 |
95 | start_part <- sub('/[^/]*$', '', season_url)
96 | end_part <- gsub(".*/", "", season_url)
97 |
98 | stat_urls <- paste0(start_part, "/", "playingtime", "/", end_part)
99 |
100 | Sys.sleep(5)
101 | pg <- read_html_selenium(stat_urls)
102 |
103 | tab_elem <- pg %>% html_elements("#div_stats_playing_time")
104 |
105 | urls <- tab_elem %>%
106 | rvest::html_nodes("table") %>%
107 | rvest::html_nodes("tbody") %>%
108 | rvest::html_nodes("tr") %>% rvest::html_node("td a") %>% rvest::html_attr("href") %>% paste0(main_url, .)
109 |
110 | stat_df <- tab_elem %>% html_table() %>% data.frame()
111 | stat_df <- stat_df[,c(2,3,4,5,7)]
112 | stat_df_names <- stat_df[1,] %>% as.character()
113 | stat_df <- stat_df[-1,]
114 | colnames(stat_df) <- stat_df_names
115 |
116 | stat_df$Url <- urls
117 | stat_df$season_url <- season_url
118 |
119 | stat_df <- stat_df %>%
120 | filter(Nation != "Nation")
121 |
122 | stat_df <- stat_df %>%
123 | left_join(seasons %>% select(season_end_year, competition_name, seasons_urls), by = c("season_url" = "seasons_urls"))
124 |
125 | fbref <- bind_rows(fbref, stat_df)
126 | }
127 |
128 |
129 |
130 | setwd("../")
131 | saveRDS(fbref, here::here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_mls.rds"))
132 |
133 |
134 | library(worldfootballR)
135 |
136 |
137 | valuations <- data.frame()
138 |
139 | for(i in c(2018:2021)) {
140 |
141 | print(paste("scraping year:", i))
142 | vals <- tm_player_market_values(country_name = c("United States"),
143 | start_year = i)
144 |
145 | valuations <- bind_rows(valuations, vals)
146 | }
147 |
148 |
149 |
150 | saveRDS(valuations, here::here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_mls.rds"))
151 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_championship.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_championship.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_extra_tier1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_extra_tier1.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_mls.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_mls.rds
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/output/initial-match/build_mapping_dictionary.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 |
6 |
7 | playing_time <- fb_big5_advanced_season_stats(season_end_year = c(2018:2022),
8 | stat_type = "playing_time",
9 | team_or_player = "player")
10 | saveRDS(playing_time, "fbref_players.rds")
11 |
12 |
13 | valuations <- get_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
14 | start_year = c(2017:2021))
15 | saveRDS(valuations, "tm_players.rds")
16 |
17 | ###############################################################################################################
18 |
19 | fbref <- readRDS("fbref_players.rds")
20 | tm <- readRDS("tm_players.rds")
21 |
22 |
23 | fbref <- fbref %>%
24 | select(Season_End_Year, Squad, Comp, Player, Nation, Born, Url) %>%
25 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>%
26 | arrange(Player)
27 |
28 | tm <- tm %>%
29 | select(comp_name, region, country, season_start_year, squad, player_name, player_position,
30 | player_dob, player_nationality, player_market_value_euro, player_url) %>%
31 | arrange(player_name)
32 |
33 | # want a df to help with inspection of names with special characters
34 | tm_unique <- tm %>%
35 | distinct(player_name, player_dob, player_url) %>%
36 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)),
37 | tm_yob = lubridate::year(player_dob))
38 |
39 | #----- primary join type: -----#
40 | # here I will join the two datasets on the player names
41 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
42 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name"))
43 |
44 | joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
45 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob) %>% distinct(player_url, .keep_all = T),
46 | by = c("Player" = "player_name", "Born" = "tm_yob"))
47 |
48 | # arrange by player name
49 | joined_primary <- joined_primary %>% arrange(Player)
50 |
51 |
52 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example
53 | # will need to manually go through each of these to map the correct player
54 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url)
55 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players)
56 |
57 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB
58 | # no_longer_dups <- duplicate_players %>%
59 | # mutate(tm_yob = lubridate::year(player_dob)) %>%
60 | # filter(Born == tm_yob)
61 | #
62 | # still_dups <- no_longer_dups %>%
63 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique()
64 | #
65 | # still_dups <- duplicate_players %>%
66 | # filter(Url %in% still_dups)
67 | #
68 | # no_longer_dups <- no_longer_dups %>%
69 | # filter(!Url %in% still_dups$Url)
70 |
71 | # now remove these records from the raw joined data
72 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df
73 | joined_primary <- joined_primary %>%
74 | filter(!Url %in% duplicate_players$Url)
75 |
76 | # get a full list of joins on full player name that I'm happy with
77 | joined_complete <- joined_primary %>%
78 | filter(!is.na(player_url))
79 |
80 | # get a list of records where there were no matches on full player name
81 | joined_missing <- joined_primary %>%
82 | filter(is.na(player_url))
83 |
84 | #----- secondary join type: -----#
85 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players
86 | joined_secondary <- joined_missing %>%
87 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>%
88 | select(-player_dob, -player_url) %>%
89 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob"))
90 |
91 | # now there are some more duplicates as a result of this secondary join method
92 | additional_duplicated_players <- joined_secondary %>%
93 | filter(!is.na(player_url)) %>%
94 | count(Player, Url, sort = T) %>%
95 | filter(n > 1) %>% pull(Url) %>% unique()
96 |
97 | additional_duplicated_players <- joined_secondary %>%
98 | filter(Url %in% additional_duplicated_players)
99 |
100 |
101 | # combine all duplicated joins for manual rework:
102 | duplicate_players <- duplicate_players %>%
103 | bind_rows(additional_duplicated_players)
104 |
105 | duplicate_players <- duplicate_players %>%
106 | select(-fbref_surname, -player_name)
107 |
108 |
109 | joined_secondary <- joined_secondary %>%
110 | filter(!is.na(player_url),
111 | !Url %in% additional_duplicated_players$Url) %>%
112 | select(Player, Born, Url, player_dob, player_url)
113 |
114 |
115 | joined_finished <- joined_complete %>%
116 | filter(!is.na(player_url)) %>%
117 | bind_rows(joined_secondary)
118 |
119 |
120 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`:
121 | joined_missing <- joined_missing %>%
122 | filter(!Url %in% joined_finished$Url,
123 | !Url %in% duplicate_players$Url)
124 |
125 |
126 | # write files to work on manually
127 | write.csv(joined_finished, here("output", "initial-match", "joined_finished.csv"), row.names = F)
128 | write.csv(joined_missing, here("output", "initial-match", "working-files", "joined_missing.csv"), row.names = F)
129 | write.csv(tm_unique, here("output", "initial-match", "working-files", "tm_unique.csv"), row.names = F)
130 | write.csv(duplicate_players, here("output", "initial-match", "working-files", "duplicate_players_df.csv"), row.names = F)
131 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/output/initial-match/working-files/duplicate_players_df.csv:
--------------------------------------------------------------------------------
1 | Player,Born,Url,player_dob,player_url,fbref_surname,player_name
2 | Adama Traoré,1995,https://fbref.com/en/players/1a6f2a66/Adama-Traore,28/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/262608,NA,NA
3 | Adama Traoré,1995,https://fbref.com/en/players/f9edc384/Adama-Traore,5/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/364405,NA,NA
4 | Guilherme,1991,https://fbref.com/en/players/9e61c019/Guilherme,21/5/91,https://www.transfermarkt.com/guilherme/profil/spieler/139607,NA,NA
5 | Guilherme,1991,https://fbref.com/en/players/8754c7ca/Guilherme,5/4/91,https://www.transfermarkt.com/guilherme/profil/spieler/115382,NA,NA
6 | Rafael,1990,https://fbref.com/en/players/9a1f2e1c/Rafael,9/7/90,https://www.transfermarkt.com/rafael/profil/spieler/61892,NA,NA
7 | Adrián López,1999,https://fbref.com/en/players/c58ebf64/Adrian-Lopez,9/1/99,https://www.transfermarkt.com/adri-lopez/profil/spieler/412042,López,Adri López
8 | Alejandro López,1997,https://fbref.com/en/players/f1887f53/Alejandro-Lopez,2/6/97,https://www.transfermarkt.com/alex-lopez/profil/spieler/313113,López,Álex López
9 | Amadou Dia Ndiaye,2000,https://fbref.com/en/players/1f0ea0a6/Amadou-Dia-Ndiaye,2/1/00,https://www.transfermarkt.com/amadou-ndiaye/profil/spieler/568695,Ndiaye,Amadou Ndiaye
10 | Basit Abdallah,1999,https://fbref.com/en/players/13a0ff99/Basit-Abdallah,1/7/99,https://www.transfermarkt.com/abdallah-basit/profil/spieler/457773,Abdallah,Benrandy Abdallah
11 | Cal Roberts,1997,https://fbref.com/en/players/8e9caf48/Cal-Roberts,14/4/97,https://www.transfermarkt.com/callum-roberts/profil/spieler/288952,Roberts,Callum Roberts
12 | Cristo González,1997,https://fbref.com/en/players/896b5df2/Cristo-Gonzalez,1/4/97,https://www.transfermarkt.com/cristo/profil/spieler/339707,González,Edgar González
13 | Daniel Martín,1998,https://fbref.com/en/players/4df0dff8/Daniel-Martin,8/7/98,https://www.transfermarkt.com/dani-martin/profil/spieler/335221,Martín,Dani Martín
14 | Daniel Torres,1989,https://fbref.com/en/players/87882adc/Daniel-Torres,15/11/89,https://www.transfermarkt.com/dani-torres/profil/spieler/93142,Torres,Dani Torres
15 | David Pereira da Costa,2001,https://fbref.com/en/players/59948ef7/David-Pereira-da-Costa,5/1/01,https://www.transfermarkt.com/david-costa/profil/spieler/719442,Costa,David Costa
16 | Diego Matías Rodríguez,1989,https://fbref.com/en/players/19f4d2c0/Diego-Matias-Rodriguez,25/6/89,https://www.transfermarkt.com/diego-rodriguez/profil/spieler/90800,Rodríguez,Diego Rodríguez
17 | Dion-Curtis Henry,1997,https://fbref.com/en/players/156bb589/Dion-Curtis-Henry,12/9/97,https://www.transfermarkt.com/dion-henry/profil/spieler/345899,Henry,Dion Henry
18 | Édgar González,1997,https://fbref.com/en/players/49d028db/Edgar-Gonzalez,1/4/97,https://www.transfermarkt.com/edgar-gonzalez/profil/spieler/401624,González,Edgar González
19 | Eduardo Bubacar Baldé,1999,https://fbref.com/en/players/3caf4f73/Eduardo-Bubacar-Balde,10/3/99,https://www.transfermarkt.com/eduardo-balde/profil/spieler/529356,Baldé,Eduardo Baldé
20 | Flavio Junior Bianchi,2000,https://fbref.com/en/players/3ef965c1/Flavio-Junior-Bianchi,24/1/00,https://www.transfermarkt.com/flavio-bianchi/profil/spieler/364132,Bianchi,Flavio Bianchi
21 | Florent da Silva,2003,https://fbref.com/en/players/8db95f95/Florent-da-Silva,2/4/03,https://www.transfermarkt.com/florent-da-silva/profil/spieler/607225,Silva,Florent Da Silva
22 | Hianga Mananga Mbock,1999,https://fbref.com/en/players/0f86995c/Hianga-Mananga-Mbock,28/12/99,https://www.transfermarkt.com/hiangaa-mbock/profil/spieler/684062,Mbock,Hianga'a Mbock
23 | Javier Jiménez García,1997,https://fbref.com/en/players/f30d7505/Javier-Jimenez-Garcia,28/6/97,https://www.transfermarkt.com/javi-jimenez/profil/spieler/251860,García,Aleix García
24 | Javier Martín,1998,https://fbref.com/en/players/789773d9/Javier-Martin,25/1/98,https://www.transfermarkt.com/javi-martin/profil/spieler/534372,Martín,Álex Martín
25 | Joan García,2001,https://fbref.com/en/players/87b498b0/Joan-Garcia,12/2/01,https://www.transfermarkt.com/joan-garcia/profil/spieler/561613,García,Carlo García
26 | João Paulo Santos Costa,1996,https://fbref.com/en/players/00225aae/Joao-Paulo-Santos-Costa,2/2/96,https://www.transfermarkt.com/joao-costa/profil/spieler/198638,Costa,João Costa
27 | Joel Castro Pereira,1996,https://fbref.com/en/players/881e5db7/Joel-Castro-Pereira,28/6/96,https://www.transfermarkt.com/joel-pereira/profil/spieler/192611,Pereira,Joel Pereira
28 | José Mena Rodríguez,1998,https://fbref.com/en/players/0af4b238/Jose-Mena-Rodriguez,23/3/98,https://www.transfermarkt.com/pepe-mena/profil/spieler/396145,Rodríguez,Genaro Rodríguez
29 | Leonardo Suárez,1996,https://fbref.com/en/players/25c72b36/Leonardo-Suarez,30/3/96,https://www.transfermarkt.com/leo-suarez/profil/spieler/294894,Suárez,Leo Suárez
30 | Lluis López,1997,https://fbref.com/en/players/a685f013/Lluis-Lopez,5/3/97,https://www.transfermarkt.com/lluis-lopez/profil/spieler/262391,López,Lluís López
31 | Mama Samba Baldé,1995,https://fbref.com/en/players/fb14aa28/Mama-Samba-Balde,6/11/95,https://www.transfermarkt.com/mama-balde/profil/spieler/325223,Baldé,Mama Baldé
32 | Manuel Sánchez,2000,https://fbref.com/en/players/ffacd3d5/Manuel-Sanchez,24/8/00,https://www.transfermarkt.com/manu-sanchez/profil/spieler/618809,Sánchez,Manu Sánchez
33 | Martín,1999,https://fbref.com/en/players/cbec0059/Martin,11/7/99,https://www.transfermarkt.com/martin-calderon/profil/spieler/278404,Martín,Andrés Martín
34 | Nelson Sissoko,1997,https://fbref.com/en/players/12bd0579/Nelson-Sissoko,7/3/97,https://www.transfermarkt.com/alpha-sissoko/profil/spieler/594992,Sissoko,Alpha Sissoko
35 | Pio Francesco Russo,1999,https://fbref.com/en/players/5e913bf9/Pio-Francesco-Russo,1/3/99,https://www.transfermarkt.com/francesco-pio-russo/profil/spieler/315866,Russo,Francesco Pio Russo
36 | Raúl García,1989,https://fbref.com/en/players/1a317a1b/Raul-Garcia,25/11/89,https://www.transfermarkt.com/raul-carnero/profil/spieler/139434,García,Kike García
37 | Samu Pérez,1997,https://fbref.com/en/players/aae17c81/Samu-Perez,26/4/97,https://www.transfermarkt.com/samuel-perez/profil/spieler/363541,Pérez,Samuel Pérez
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/output/initial-match/working-files/duplicate_players_df_manual_fix.csv:
--------------------------------------------------------------------------------
1 | Player,Born,Url,player_dob,player_url,fbref_surname,player_name
2 | Adama Traoré,1995,https://fbref.com/en/players/1a6f2a66/Adama-Traore,28/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/262608,NA,NA
3 | Adama Traoré,1995,https://fbref.com/en/players/f9edc384/Adama-Traore,5/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/364405,NA,NA
4 | Guilherme,1991,https://fbref.com/en/players/9e61c019/Guilherme,21/5/91,https://www.transfermarkt.com/guilherme/profil/spieler/139607,NA,NA
5 | Guilherme,1991,https://fbref.com/en/players/8754c7ca/Guilherme,5/4/91,https://www.transfermarkt.com/guilherme/profil/spieler/115382,NA,NA
6 | Rafael,1990,https://fbref.com/en/players/9a1f2e1c/Rafael,9/7/90,https://www.transfermarkt.com/rafael/profil/spieler/61892,NA,NA
7 | Adrián López,1999,https://fbref.com/en/players/c58ebf64/Adrian-Lopez,9/1/99,https://www.transfermarkt.com/adri-lopez/profil/spieler/412042,López,Adri López
8 | Alejandro López,1997,https://fbref.com/en/players/f1887f53/Alejandro-Lopez,2/6/97,https://www.transfermarkt.com/alex-lopez/profil/spieler/313113,López,Álex López
9 | Amadou Dia Ndiaye,2000,https://fbref.com/en/players/1f0ea0a6/Amadou-Dia-Ndiaye,2/1/00,https://www.transfermarkt.com/amadou-ndiaye/profil/spieler/568695,Ndiaye,Amadou Ndiaye
10 | Basit Abdallah,1999,https://fbref.com/en/players/13a0ff99/Basit-Abdallah,1/7/99,https://www.transfermarkt.com/abdallah-basit/profil/spieler/457773,Abdallah,Benrandy Abdallah
11 | Cal Roberts,1997,https://fbref.com/en/players/8e9caf48/Cal-Roberts,14/4/97,https://www.transfermarkt.com/callum-roberts/profil/spieler/288952,Roberts,Callum Roberts
12 | Cristo González,1997,https://fbref.com/en/players/896b5df2/Cristo-Gonzalez,1/4/97,https://www.transfermarkt.com/cristo/profil/spieler/339707,González,Edgar González
13 | Daniel Martín,1998,https://fbref.com/en/players/4df0dff8/Daniel-Martin,8/7/98,https://www.transfermarkt.com/dani-martin/profil/spieler/335221,Martín,Dani Martín
14 | Daniel Torres,1989,https://fbref.com/en/players/87882adc/Daniel-Torres,15/11/89,https://www.transfermarkt.com/dani-torres/profil/spieler/93142,Torres,Dani Torres
15 | David Pereira da Costa,2001,https://fbref.com/en/players/59948ef7/David-Pereira-da-Costa,5/1/01,https://www.transfermarkt.com/david-costa/profil/spieler/719442,Costa,David Costa
16 | Diego Matías Rodríguez,1989,https://fbref.com/en/players/19f4d2c0/Diego-Matias-Rodriguez,25/6/89,https://www.transfermarkt.com/diego-rodriguez/profil/spieler/90800,Rodríguez,Diego Rodríguez
17 | Dion-Curtis Henry,1997,https://fbref.com/en/players/156bb589/Dion-Curtis-Henry,12/9/97,https://www.transfermarkt.com/dion-henry/profil/spieler/345899,Henry,Dion Henry
18 | Édgar González,1997,https://fbref.com/en/players/49d028db/Edgar-Gonzalez,1/4/97,https://www.transfermarkt.com/edgar-gonzalez/profil/spieler/401624,González,Edgar González
19 | Eduardo Bubacar Baldé,1999,https://fbref.com/en/players/3caf4f73/Eduardo-Bubacar-Balde,10/3/99,https://www.transfermarkt.com/eduardo-balde/profil/spieler/529356,Baldé,Eduardo Baldé
20 | Flavio Junior Bianchi,2000,https://fbref.com/en/players/3ef965c1/Flavio-Junior-Bianchi,24/1/00,https://www.transfermarkt.com/flavio-bianchi/profil/spieler/364132,Bianchi,Flavio Bianchi
21 | Florent da Silva,2003,https://fbref.com/en/players/8db95f95/Florent-da-Silva,2/4/03,https://www.transfermarkt.com/florent-da-silva/profil/spieler/607225,Silva,Florent Da Silva
22 | Hianga Mananga Mbock,1999,https://fbref.com/en/players/0f86995c/Hianga-Mananga-Mbock,28/12/99,https://www.transfermarkt.com/hiangaa-mbock/profil/spieler/684062,Mbock,Hianga'a Mbock
23 | Javier Jiménez García,1997,https://fbref.com/en/players/f30d7505/Javier-Jimenez-Garcia,28/6/97,https://www.transfermarkt.com/javi-jimenez/profil/spieler/251860,García,Aleix García
24 | Javier Martín,1998,https://fbref.com/en/players/789773d9/Javier-Martin,25/1/98,https://www.transfermarkt.com/javi-martin/profil/spieler/534372,Martín,Álex Martín
25 | Joan García,2001,https://fbref.com/en/players/87b498b0/Joan-Garcia,12/2/01,https://www.transfermarkt.com/joan-garcia/profil/spieler/561613,García,Carlo García
26 | João Paulo Santos Costa,1996,https://fbref.com/en/players/00225aae/Joao-Paulo-Santos-Costa,2/2/96,https://www.transfermarkt.com/joao-costa/profil/spieler/198638,Costa,João Costa
27 | Joel Castro Pereira,1996,https://fbref.com/en/players/881e5db7/Joel-Castro-Pereira,28/6/96,https://www.transfermarkt.com/joel-pereira/profil/spieler/192611,Pereira,Joel Pereira
28 | José Mena Rodríguez,1998,https://fbref.com/en/players/0af4b238/Jose-Mena-Rodriguez,23/3/98,https://www.transfermarkt.com/pepe-mena/profil/spieler/396145,Rodríguez,Genaro Rodríguez
29 | Leonardo Suárez,1996,https://fbref.com/en/players/25c72b36/Leonardo-Suarez,30/3/96,https://www.transfermarkt.com/leo-suarez/profil/spieler/294894,Suárez,Leo Suárez
30 | Lluis López,1997,https://fbref.com/en/players/a685f013/Lluis-Lopez,5/3/97,https://www.transfermarkt.com/lluis-lopez/profil/spieler/262391,López,Lluís López
31 | Mama Samba Baldé,1995,https://fbref.com/en/players/fb14aa28/Mama-Samba-Balde,6/11/95,https://www.transfermarkt.com/mama-balde/profil/spieler/325223,Baldé,Mama Baldé
32 | Manuel Sánchez,2000,https://fbref.com/en/players/ffacd3d5/Manuel-Sanchez,24/8/00,https://www.transfermarkt.com/manu-sanchez/profil/spieler/618809,Sánchez,Manu Sánchez
33 | Martín,1999,https://fbref.com/en/players/cbec0059/Martin,11/7/99,https://www.transfermarkt.com/martin-calderon/profil/spieler/278404,Martín,Andrés Martín
34 | Nelson Sissoko,1997,https://fbref.com/en/players/12bd0579/Nelson-Sissoko,7/3/97,https://www.transfermarkt.com/alpha-sissoko/profil/spieler/594992,Sissoko,Alpha Sissoko
35 | Pio Francesco Russo,1999,https://fbref.com/en/players/5e913bf9/Pio-Francesco-Russo,1/3/99,https://www.transfermarkt.com/francesco-pio-russo/profil/spieler/315866,Russo,Francesco Pio Russo
36 | Raúl García,1989,https://fbref.com/en/players/1a317a1b/Raul-Garcia,25/11/89,https://www.transfermarkt.com/raul-carnero/profil/spieler/139434,García,Kike García
37 | Samu Pérez,1997,https://fbref.com/en/players/aae17c81/Samu-Perez,26/4/97,https://www.transfermarkt.com/samuel-perez/profil/spieler/363541,Pérez,Samuel Pérez
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/output/working-files/duplicate_players_df.csv:
--------------------------------------------------------------------------------
1 | "Player","Born","Url","player_dob","player_url","player_position"
2 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/prepare_working_files.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(here)
4 |
5 | playing_time <- fb_big5_advanced_season_stats(season_end_year = 2024,
6 | stat_type = "playing_time",
7 | team_or_player = "player")
8 |
9 | tm <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
10 | start_year = 2023)
11 |
12 |
13 |
14 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
15 |
16 |
17 | fbref <- playing_time %>% filter(!Url %in% matched_data$UrlFBref)
18 |
19 | fbref <- fbref %>%
20 | # have made the decision to get rid of players that were listed on team sheets but
21 | # haven't yet played as there's too many manual matches of youth players
22 | filter(!is.na(Min_Playing.Time)) %>%
23 | select(Season_End_Year, Squad, Comp, Player, Nation, Born, Url) %>%
24 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>%
25 | arrange(Player)
26 |
27 | tm <- tm %>%
28 | select(comp_name, region, country, season_start_year, squad, player_name, player_position,
29 | player_dob, player_nationality, player_market_value_euro, player_url) %>%
30 | arrange(player_name)
31 |
32 | # want a df to help with inspection of names with special characters
33 | tm_unique <- tm %>%
34 | arrange(player_url, desc(season_start_year)) %>%
35 | distinct(player_name, player_dob, player_url, .keep_all = TRUE) %>%
36 | select(player_name, player_dob, player_url, player_position) %>%
37 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)),
38 | tm_yob = lubridate::year(player_dob))
39 |
40 | #----- primary join type: -----#
41 | # here I will join the two datasets on the player names
42 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
43 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name"))
44 |
45 | # stringi::stri_trans_general("Audric Estimé", "latin-ascii")
46 |
47 | joined_primary <- fbref %>%
48 | select(Player, Born, Url) %>%
49 | mutate(Player = stringi::stri_trans_general(Player, "latin-ascii")) |>
50 | distinct(Url, .keep_all = T) %>%
51 | left_join(
52 | tm_unique %>%
53 | select(player_name, player_dob, player_url, player_position, tm_yob) %>%
54 | mutate(player_name = stringi::stri_trans_general(player_name, "latin-ascii")) |>
55 | distinct(player_url, .keep_all = T),
56 | by = c("Player" = "player_name", "Born" = "tm_yob")
57 | )
58 |
59 |
60 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>%
61 | # left_join(tm_unique %>% select(player_name, player_dob, player_url, player_position, tm_yob) %>% distinct(player_url, .keep_all = T),
62 | # by = c("Player" = "player_name", "Born" = "tm_yob"))
63 |
64 | # arrange by player name
65 | joined_primary <- joined_primary %>% arrange(Player)
66 |
67 |
68 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example
69 | # will need to manually go through each of these to map the correct player
70 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url)
71 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players)
72 |
73 |
74 | # now remove these records from the raw joined data
75 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df
76 | joined_primary <- joined_primary %>%
77 | filter(!Url %in% duplicate_players$Url)
78 |
79 | # get a full list of joins on full player name that I'm happy with
80 | joined_complete <- joined_primary %>%
81 | filter(!is.na(player_url))
82 |
83 | # get a list of records where there were no matches on full player name
84 | joined_missing <- joined_primary %>%
85 | filter(is.na(player_url))
86 |
87 | #----- secondary join type: -----#
88 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players
89 | joined_secondary <- joined_missing %>%
90 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>%
91 | select(-player_dob, -player_url) %>%
92 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob", "player_position"))
93 |
94 | # now there are some more duplicates as a result of this secondary join method
95 | additional_duplicated_players <- joined_secondary %>%
96 | filter(!is.na(player_url)) %>%
97 | count(Player, Url, sort = T) %>%
98 | filter(n > 1) %>% pull(Url) %>% unique()
99 |
100 | additional_duplicated_players <- joined_secondary %>%
101 | filter(Url %in% additional_duplicated_players)
102 |
103 |
104 | # combine all duplicated joins for manual rework:
105 | duplicate_players <- duplicate_players %>%
106 | bind_rows(additional_duplicated_players)
107 |
108 | duplicate_players <- duplicate_players %>%
109 | select(-fbref_surname, -player_name)
110 |
111 |
112 | joined_secondary <- joined_secondary %>%
113 | filter(!is.na(player_url),
114 | !Url %in% additional_duplicated_players$Url) %>%
115 | select(Player, Born, Url, player_dob, player_url)
116 |
117 |
118 | joined_finished <- joined_complete %>%
119 | filter(!is.na(player_url)) %>%
120 | bind_rows(joined_secondary)
121 |
122 |
123 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`:
124 | joined_missing <- joined_missing %>%
125 | filter(!Url %in% joined_finished$Url,
126 | !Url %in% duplicate_players$Url)
127 |
128 |
129 | # write files to work on manually
130 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_finished.csv"), row.names = F)
131 | write.csv(joined_missing, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing.csv"), row.names = F)
132 | write.csv(tm_unique, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "tm_unique.csv"), row.names = F)
133 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df.csv"), row.names = F)
134 |
--------------------------------------------------------------------------------
/raw-data/fbref-tm-player-mapping/update_player_positions.R:
--------------------------------------------------------------------------------
1 | library(worldfootballR)
2 | library(tidyverse)
3 | library(googlesheets4)
4 | library(here)
5 | library(gt)
6 |
7 | # first, we get updated data to ensure all new players are being captured
8 | playing_time <- fb_big5_advanced_season_stats(season_end_year = 2023,
9 | stat_type = "playing_time",
10 | team_or_player = "player")
11 |
12 | tm <- get_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"),
13 | start_year = 2022)
14 |
15 | # saveRDS(tm, here("raw-data", "fbref-tm-player-mapping", "data", "tm_data.rds"))
16 |
17 | # read in matched data
18 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE)
19 |
20 | # create a separate df to allow for analysis of players who have changed positions
21 | this_season <- matched_data %>%
22 | left_join(tm %>% select(squad, UrlTmarkt=player_url, TmPos_22_23=player_position, player_market_value_euro), by = "UrlTmarkt")
23 |
24 | # display players that have changed data
25 | this_season %>%
26 | filter(!is.na(TmPos_22_23)) %>%
27 | filter(TmPos_22_23 != TmPos) %>%
28 | select(PlayerFBref, squad, TmPos, TmPos_22_23, player_market_value_euro) %>%
29 | arrange(desc(player_market_value_euro)) %>%
30 | mutate(player_market_value_euro = scales::dollar(player_market_value_euro, prefix = "€")) %>%
31 | gt() %>%
32 | tab_options(column_labels.font.size = 20,
33 | column_labels.font.weight = "bold")
34 |
35 |
36 |
37 | # now overwrite older positions with the new ones
38 | matched_data <- matched_data %>%
39 | left_join(tm %>% select(UrlTmarkt=player_url, TmPos_22_23=player_position), by = "UrlTmarkt") %>%
40 | mutate(
41 | TmPos = case_when(
42 | is.na(TmPos_22_23) ~ TmPos,
43 | TRUE ~ TmPos_22_23
44 | )
45 | ) %>%
46 | select(-TmPos_22_23)
47 |
48 |
49 | #=============
50 | # Write Files
51 | #=============
52 |
53 | # write file for commit to GitHub:
54 | write.csv(matched_data, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE)
55 |
56 | # Write file to Googlesheets:
57 | # get the sheet id
58 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>%
59 | as.character()
60 |
61 | # write the sheet
62 | sheet_write(matched_data,
63 | ss,
64 | sheet = "fbref_to_tm_mapping")
65 |
66 |
67 |
--------------------------------------------------------------------------------
/raw-data/fotmob-leagues/all_leagues.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fotmob-leagues/all_leagues.csv
--------------------------------------------------------------------------------
/raw-data/job_controller.R:
--------------------------------------------------------------------------------
1 | # SCHEDULED SCRAPERS
2 |
3 | # load libraries
4 | library(here)
5 |
6 | # Scrape League Seasons Data ------------------------------------------------------
7 | # source(here::here("raw-data", "league_seasons", "get_league_seasons.R"))
8 |
9 | # Scrape Countries Data ---------------------------------------------------
10 | # source(here::here("raw-data", "countries_list", "get_countries_list.R"))
11 |
12 |
13 |
14 | # Scrape All Competition Season’s Data ------------------------------------
15 | source(here::here("raw-data", "all_leages_and_cups", "get_all_comp_seasons.R"))
16 |
17 |
18 |
19 | # Scrape Transfermarkt Data -----------------------------------------------
20 | source(here::here("raw-data", "transfermarkt_leagues", "get_transfermarkt_metadata.R"))
21 |
22 |
23 | # Scrape Fotmob league data ----------------------------------------------
24 | # source(here::here("raw-data", "fotmob-leagues", "get_fotmob_leagues.R"))
25 |
26 |
--------------------------------------------------------------------------------
/raw-data/league_seasons/get_league_seasons.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | library(worldfootballR)
3 |
4 | .get_tier1_competitions <- function() {
5 | main_url <- "https://fbref.com"
6 | # read page to all competitions
7 | all_comps_url <- xml2::read_html("https://fbref.com/en/comps/")
8 | # this just gets the Tier 1 club comps - this will need to be modified if more comps are required
9 | comps <- all_comps_url %>% rvest::html_nodes("#all_comps_1_fa_club_league_senior")
10 | # get the urls for each competition, then paste fbref url
11 | competition_urls <- comps %>% rvest::html_node("tbody") %>% rvest::html_nodes("th a") %>% rvest::html_attr("href")
12 | competition_urls <- paste0(main_url, competition_urls)
13 | # scrape the table that contains the competitons
14 | competitions <- comps %>% rvest::html_nodes(".sortable") %>% rvest::html_table() %>% data.frame()
15 | # add the competition url column
16 | competitions <- cbind(competitions, competition_urls)
17 | # remove the two character country code for the flag, and only leave the 3 character code
18 | competitions$Country <- gsub(".*? ", "", competitions$Country)
19 |
20 | return(competitions)
21 | }
22 |
23 |
24 | get_league_seasons_url <- function() {
25 | main_url <- "https://fbref.com"
26 |
27 | competitions <- .get_tier1_competitions()
28 |
29 | league_urls <- competitions %>%
30 | dplyr::pull(.data$competition_urls)
31 |
32 | get_urls <- function(league_url) {
33 | print(glue::glue("Scraping season URLs from {league_url}"))
34 | league_page <- xml2::read_html(league_url)
35 |
36 | seasons <- league_page %>%
37 | rvest::html_nodes("th a") %>%
38 | rvest::html_text()
39 |
40 | season_end_year <- league_page %>%
41 | rvest::html_nodes("th a") %>%
42 | rvest::html_text() %>%
43 | gsub(".*-", "", .)
44 |
45 |
46 | seasons_urls <- league_page %>%
47 | rvest::html_nodes("th a") %>%
48 | rvest::html_attr("href") %>%
49 | paste0(main_url, .)
50 |
51 | # fixtures_url <- xml2::read_html(season_url) %>%
52 | # rvest::html_nodes(".hoversmooth") %>%
53 | # rvest::html_nodes(".full") %>%
54 | # rvest::html_nodes("a") %>%
55 | # rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .)
56 |
57 | get_fixtures <- function(season_url) {
58 | round(runif(1, 3, 10))
59 | fixtures_url <- xml2::read_html(season_url) %>%
60 | rvest::html_nodes(".hoversmooth") %>%
61 | rvest::html_nodes(".full") %>%
62 | rvest::html_nodes("a") %>%
63 | rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .)
64 |
65 | fixtures_url <- if(grepl("Fixtures", fixtures_url)){
66 | fixtures_url <- fixtures_url
67 | } else {
68 | fixtures_url <- NA
69 | }
70 |
71 | return(fixtures_url)
72 | }
73 |
74 | fixtures_url <- seasons_urls %>%
75 | purrr::map_chr(get_fixtures)
76 |
77 | all_league_seasons <- cbind(league_url, seasons, season_end_year, seasons_urls, fixtures_url) %>% data.frame()
78 |
79 |
80 | return(all_league_seasons)
81 | }
82 |
83 | all_urls <- league_urls %>%
84 | purrr::map_df(get_urls) %>%
85 | dplyr::left_join(competitions, ., by = c("competition_urls" = "league_url")) %>%
86 | janitor::clean_names()
87 |
88 | }
89 |
90 |
91 |
92 | all_tier1_season_URLs <- get_league_seasons_url()
93 |
94 | write.csv(all_tier1_season_URLs, here::here("raw-data", "league_seasons", "all_tier1_season_URLs.csv"), row.names = F)
95 |
--------------------------------------------------------------------------------
/raw-data/transfermarkt_leagues/get_transfermarkt_metadata.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | library(rvest)
3 |
4 | main_url <- "https://www.transfermarkt.com"
5 |
6 |
7 | # Get Competitions --------------------------------------------------------
8 |
9 | # need to hard code this - no idea where to go to get the regions:
10 | regions <- c("europa", "asien", "amerika", "afrika")
11 |
12 | # region_urls <- paste0("https://www.transfermarkt.com/wettbewerbe/", regions)
13 |
14 | all_comps <- data.frame()
15 |
16 | for(region in regions) {
17 | print(paste("Scraping league URLs from the", region, "region"))
18 | region_url <- paste0("https://www.transfermarkt.com/wettbewerbe/", region)
19 | Sys.sleep(3)
20 | comp <- xml2::read_html(region_url)
21 |
22 | comp_name <- comp %>% rvest::html_nodes(".inline-table td+ td a") %>% rvest::html_text()
23 | comp_url <- comp %>% rvest::html_nodes(".inline-table td+ td a") %>% rvest::html_attr("href") %>% paste0(main_url, .)
24 |
25 | flags_list <- comp %>% rvest::html_nodes(".hauptlink+ .zentriert")
26 | country <- c()
27 | for(i in 1:length(flags_list)) {
28 | a <- xml2::xml_attrs(xml2::xml_child(flags_list[[i]], 1))[["title"]]
29 | country <- c(country, a)
30 | }
31 |
32 | comps_df <- cbind(comp_name, region, country, comp_url) %>% data.frame()
33 |
34 | all_comps <- rbind(all_comps, comps_df)
35 | }
36 |
37 |
38 | # Get season URLs ---------------------------------------------------------
39 |
40 |
41 | all_seasons_df <- data.frame()
42 |
43 | for(each_league_url in 1:nrow(all_comps)) {
44 | print(paste0("scraping league ", each_league_url, " of ", nrow(all_comps)))
45 | Sys.sleep(4)
46 | comp_url <- all_comps$comp_url[each_league_url]
47 | league_page <- xml2::read_html(comp_url)
48 |
49 | seasons <- league_page %>% rvest::html_nodes(".chzn-select") %>% rvest::html_nodes("option")
50 |
51 | season_start_year <- c()
52 | for(each_season in seasons) {
53 | season_start_year <- c(season_start_year, xml2::xml_attrs(each_season)[["value"]])
54 | }
55 |
56 | season_urls <- paste0(comp_url, "/plus/?saison_id=", season_start_year)
57 |
58 | league_seasons_df <- cbind(comp_url, season_start_year, season_urls) %>% data.frame()
59 |
60 | all_seasons_df <- rbind(all_seasons_df, league_seasons_df)
61 | }
62 |
63 | all_data <- all_comps %>%
64 | dplyr::left_join(all_seasons_df, by = "comp_url")
65 |
66 | all_data <- all_data %>%
67 | dplyr::mutate(region = dplyr::case_when(
68 | region == "europa" ~ "Europe",
69 | region == "asien" ~ "Asia",
70 | region == "amerika" ~ "Americas",
71 | region == "afrika" ~ "Africa"
72 | ))
73 |
74 | write.csv(all_data, here::here("raw-data", "transfermarkt_leagues", "main_comp_seasons.csv"), row.names = F)
75 |
76 |
77 |
--------------------------------------------------------------------------------
/raw-data/transfermarkt_staff/get_staff_types.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 | library(here)
3 |
4 | url <- "https://www.transfermarkt.com/real-madrid/mitarbeiterhistorie/verein/418"
5 | history_pg <- xml2::read_html(url)
6 |
7 | staff <- history_pg %>% rvest::html_nodes(".auflistung tbody tr td") %>% rvest::html_nodes(".inline-select")
8 |
9 | staff_type_text <- staff %>% rvest::html_nodes("select option") %>% rvest::html_text() %>% stringr::str_squish()
10 | staff_type_idx <- staff %>% rvest::html_nodes("select option") %>% rvest::html_attr("value")
11 | staff_types <- data.frame(staff_type_idx = staff_type_idx, staff_type_text = staff_type_text) %>% dplyr::filter(staff_type_text != "")
12 | write.csv(staff_types, here("raw-data", "transfermarkt_staff", "tm_staff_types.csv"), row.names = F)
13 |
--------------------------------------------------------------------------------
/raw-data/transfermarkt_staff/tm_staff_types.csv:
--------------------------------------------------------------------------------
1 | "staff_type_idx","staff_type_text"
2 | "1","Manager"
3 | "10","Caretaker Manager"
4 | "2","Assistant Manager"
5 | "3","Goalkeeping Coach"
6 | "11","Conditioning Coach"
7 | "22","Fitness Coach"
8 | "16","Chief Analyst"
9 | "63","Athletic Coach"
10 | "23","Rehab Coach"
11 | "104","Youth Coach"
12 | "70","Video Analyst"
13 | "145","Coordinator of talent management"
14 | "13","Director of Football"
15 | "54","Sporting Director"
16 | "43","Head of Football Operations"
17 | "68","Technical Director"
18 | "25","Chief Executive Officer"
19 | "102","Adivser of management"
20 | "17","President"
21 | "27","Vice-President"
22 | "28","Chairman"
23 | "113","Vice-Chairman"
24 | "39","Board Member"
25 | "59","Member of administrative board"
26 | "83","Marketing/Management"
27 | "57","Director of Marketing and Sales"
28 | "86","Honorary President"
29 | "90","Head of Scouting"
30 | "7","Scout"
31 | "166","Youth Scout"
32 | "35","Academy manager"
33 | "19","Club Doctor"
34 | "130","Nutritionist"
35 | "44","Marketing Staff"
36 | "9","Team official"
37 | "149","Club representative"
38 |
--------------------------------------------------------------------------------
/worldfootballR_data.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 | ProjectId: a5dcf981-a6ce-4c66-83c6-2505bda960f8
3 |
4 | RestoreWorkspace: Default
5 | SaveWorkspace: Default
6 | AlwaysSaveHistory: Default
7 |
8 | EnableCodeIndexing: Yes
9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 |
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 |
--------------------------------------------------------------------------------