├── .github └── workflows │ ├── automated_adv_stats.yml │ ├── automated_fb_advanced_match_stats.yml │ ├── automated_fb_match_shooting.yml │ ├── automated_fb_match_summary.yml │ ├── automated_match_results.yml │ ├── automated_match_results_cups.yml │ ├── automated_understat_shots.yml │ └── main.yml ├── .gitignore ├── R ├── fb_advanced_match_stats │ ├── backfill_fb_advanced_match_stats.R │ ├── shared_fb_advanced_match_stats.R │ └── update_fb_advanced_match_stats.R ├── fb_big5_advanced_season_stats │ ├── backfill_big5_advanced_stats.R │ └── update_big5_advanced_stats.R ├── fb_match_results │ ├── backfill_fb_match_results.R │ └── update_fb_match_results.R ├── fb_match_results_cups │ ├── backfill_fb_cups_match_results.R │ └── update_fb_cups_match_results.R ├── fb_match_shooting │ ├── adhoc_fb_match_shooting.R │ ├── backfill_fb_match_shooting.R │ ├── shared_fb_match_shooting.R │ └── update_fb_match_shooting.R ├── fb_match_summary │ ├── backfill_fb_match_summary.R │ ├── shared_fb_match_summary.R │ └── update_fb_match_summary.R ├── piggyback.R ├── tm_player_vals │ ├── backfill_big5_player_vals.R │ └── update_big5_player_vals.R ├── tm_transfers │ └── backfill_big5_transfers.R └── understat_league_shots │ ├── backup_understat_local.R │ └── update_understat_shots.R ├── README.md ├── data ├── fb_advanced_match_stats │ └── .gitignore ├── fb_big5_advanced_season_stats │ ├── big5_player_defense.rds │ ├── big5_player_gca.rds │ ├── big5_player_keepers.rds │ ├── big5_player_keepers_adv.rds │ ├── big5_player_misc.rds │ ├── big5_player_passing.rds │ ├── big5_player_passing_types.rds │ ├── big5_player_playing_time.rds │ ├── big5_player_possession.rds │ ├── big5_player_shooting.rds │ ├── big5_player_standard.rds │ ├── big5_team_defense.rds │ ├── big5_team_gca.rds │ ├── big5_team_keepers.rds │ ├── big5_team_keepers_adv.rds │ ├── big5_team_misc.rds │ ├── big5_team_passing.rds │ ├── big5_team_passing_types.rds │ ├── big5_team_playing_time.rds │ ├── big5_team_possession.rds │ ├── big5_team_shooting.rds │ └── big5_team_standard.rds ├── fb_big5_advanced_statsbomb │ ├── README.md │ ├── big5_player_defense.rds │ ├── big5_player_gca.rds │ ├── big5_player_keepers.rds │ ├── big5_player_keepers_adv.rds │ ├── big5_player_misc.rds │ ├── big5_player_passing.rds │ ├── big5_player_passing_types.rds │ ├── big5_player_playing_time.rds │ ├── big5_player_possession.rds │ ├── big5_player_shooting.rds │ ├── big5_player_standard.rds │ ├── big5_team_defense.rds │ ├── big5_team_gca.rds │ ├── big5_team_keepers.rds │ ├── big5_team_keepers_adv.rds │ ├── big5_team_misc.rds │ ├── big5_team_passing.rds │ ├── big5_team_passing_types.rds │ ├── big5_team_playing_time.rds │ ├── big5_team_possession.rds │ ├── big5_team_shooting.rds │ └── big5_team_standard.rds ├── fb_match_shooting │ └── .gitignore ├── fb_match_summary │ └── .gitignore ├── fotmob_match_details │ └── .gitignore ├── match_results │ ├── ARG_match_results.rds │ ├── AUS_match_results.rds │ ├── AUT_match_results.rds │ ├── BEL_match_results.rds │ ├── BOL_match_results.rds │ ├── BRA_match_results.rds │ ├── BUL_match_results.rds │ ├── CAN_match_results.rds │ ├── CHI_match_results.rds │ ├── CHN_match_results.rds │ ├── COL_match_results.rds │ ├── CRO_match_results.rds │ ├── CZE_match_results.rds │ ├── DEN_match_results.rds │ ├── ECU_match_results.rds │ ├── ENG_match_results.rds │ ├── ESP_match_results.rds │ ├── FIN_match_results.rds │ ├── FRA_match_results.rds │ ├── GER_match_results.rds │ ├── GRE_match_results.rds │ ├── HUN_match_results.rds │ ├── IND_match_results.rds │ ├── IRN_match_results.rds │ ├── ITA_match_results.rds │ ├── JPN_match_results.rds │ ├── KOR_match_results.rds │ ├── KSA_match_results.rds │ ├── MEX_match_results.rds │ ├── NED_match_results.rds │ ├── NOR_match_results.rds │ ├── PAR_match_results.rds │ ├── PER_match_results.rds │ ├── POL_match_results.rds │ ├── POR_match_results.rds │ ├── ROU_match_results.rds │ ├── RSA_match_results.rds │ ├── RUS_match_results.rds │ ├── SCO_match_results.rds │ ├── SRB_match_results.rds │ ├── SUI_match_results.rds │ ├── SWE_match_results.rds │ ├── TUR_match_results.rds │ ├── UKR_match_results.rds │ ├── URU_match_results.rds │ ├── USA_match_results.rds │ └── VEN_match_results.rds ├── match_results_cups │ ├── README.Rmd │ ├── README.md │ ├── afc_asian_cup_match_results.rds │ ├── afc_asian_cup_qualification_match_results.rds │ ├── afc_womens_asian_cup_match_results.rds │ ├── afc_womens_asian_cup_qualification_match_results.rds │ ├── africa_cup_of_nations_match_results.rds │ ├── africa_cup_of_nations_qualification_match_results.rds │ ├── africa_women_cup_of_nations_match_results.rds │ ├── algarve_cup_match_results.rds │ ├── concacaf_gold_cup_match_results.rds │ ├── concacaf_w_championship_match_results.rds │ ├── copa_america_femenina_match_results.rds │ ├── copa_america_match_results.rds │ ├── copa_del_rey_match_results.rds │ ├── copa_libertadores_de_america_match_results.rds │ ├── copa_sudamericana_match_results.rds │ ├── coppa_italia_match_results.rds │ ├── coupe_de_france_match_results.rds │ ├── coupe_de_la_ligue_match_results.rds │ ├── dfb_pokal_frauen_match_results.rds │ ├── dfb_pokal_match_results.rds │ ├── efl_cup_match_results.rds │ ├── english_football_league_cup_match_results.rds │ ├── european_championship_match_results.rds │ ├── fa_cup_match_results.rds │ ├── fifa_confederations_cup_match_results.rds │ ├── fifa_womens_world_cup_match_results.rds │ ├── fifa_womens_world_cup_qualification_uefa_match_results.rds │ ├── fifa_world_cup_match_results.rds │ ├── fifa_world_cup_qualification_afc_match_results.rds │ ├── fifa_world_cup_qualification_caf_match_results.rds │ ├── fifa_world_cup_qualification_concacaf_match_results.rds │ ├── fifa_world_cup_qualification_conmebol_match_results.rds │ ├── fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds │ ├── fifa_world_cup_qualification_ofc_match_results.rds │ ├── fifa_world_cup_qualification_uefa_match_results.rds │ ├── international_friendlies_m_match_results.rds │ ├── international_friendlies_w_match_results.rds │ ├── nwsl_challenge_cup_match_results.rds │ ├── nwsl_fall_series_match_results.rds │ ├── ofc_nations_cup_match_results.rds │ ├── ofc_womens_nations_cup_match_results.rds │ ├── olympics_womens_tournament_match_results.rds │ ├── she_believes_cup_match_results.rds │ ├── uefa_champions_league_match_results.rds │ ├── uefa_euro_qualification_match_results.rds │ ├── uefa_europa_conference_league_match_results.rds │ ├── uefa_europa_league_match_results.rds │ ├── uefa_european_football_championship_qualifying_match_results.rds │ ├── uefa_nations_league_match_results.rds │ ├── uefa_womens_champions_league_match_results.rds │ ├── uefa_womens_championship_match_results.rds │ └── uefa_womens_euro_qualification_match_results.rds ├── tm_player_vals │ └── big5_player_vals.rds ├── tm_transfers │ └── big_5_transfers.rds └── understat_shots │ ├── bundesliga_shot_data.rds │ ├── epl_shot_data.rds │ ├── la_liga_shot_data.rds │ ├── ligue_1_shot_data.rds │ ├── rfpl_shot_data.rds │ └── serie_a_shot_data.rds ├── man └── figures │ ├── hex_sticker.R │ ├── logo.png │ └── logo_small_size.png ├── raw-data ├── all_leages_and_cups │ ├── all_competitions.csv │ └── get_all_comp_seasons.R ├── countries_list │ ├── countries_df.csv │ └── get_countries_list.R ├── fbref-tm-player-mapping │ ├── README.md │ ├── create_final_data.R │ ├── data │ │ └── tm_data.rds │ ├── extra-leagues │ │ ├── create_final_data.R │ │ └── initial-match │ │ │ ├── build_mapping_dictionary.R │ │ │ ├── create_final_data_initial.R │ │ │ ├── fbref_extra_leagues.rds │ │ │ ├── fbref_mls.rds │ │ │ ├── fbref_selenium.R │ │ │ ├── get_tm_extra_leagues.R │ │ │ ├── joined_finished.csv │ │ │ ├── mls │ │ │ ├── build_mapping_dictionary.R │ │ │ ├── create_final_data_initial.R │ │ │ ├── duplicate_players_df.csv │ │ │ ├── duplicate_players_df_manual_fix.csv │ │ │ ├── get_data.R │ │ │ ├── joined_finished.csv │ │ │ ├── joined_missing.csv │ │ │ ├── joined_missing_manual_fix.csv │ │ │ └── tm_unique.csv │ │ │ ├── tm_players_championship.rds │ │ │ ├── tm_players_extra_tier1.rds │ │ │ ├── tm_players_mls.rds │ │ │ └── working-files │ │ │ ├── duplicate_players_df.csv │ │ │ ├── duplicate_players_df_manual_fix.csv │ │ │ ├── joined_missing.csv │ │ │ └── tm_unique.csv │ ├── output │ │ ├── fbref_to_tm_mapping.csv │ │ ├── initial-match │ │ │ ├── build_mapping_dictionary.R │ │ │ ├── fbref_to_tm_up_to_20-21.csv │ │ │ ├── joined_finished.csv │ │ │ └── working-files │ │ │ │ ├── duplicate_players_df.csv │ │ │ │ ├── duplicate_players_df_manual_fix.csv │ │ │ │ ├── joined_missing.csv │ │ │ │ ├── joined_missing_manual_fix.csv │ │ │ │ └── tm_unique.csv │ │ └── working-files │ │ │ ├── duplicate_players_df.csv │ │ │ ├── joined_finished.csv │ │ │ ├── joined_missing.csv │ │ │ ├── joined_missing_manual_fix.csv │ │ │ └── tm_unique.csv │ ├── prepare_working_files.R │ └── update_player_positions.R ├── fotmob-leagues │ └── all_leagues.csv ├── job_controller.R ├── league_seasons │ ├── all_tier1_season_URLs.csv │ └── get_league_seasons.R ├── transfermarkt_leagues │ ├── get_transfermarkt_metadata.R │ └── main_comp_seasons.csv └── transfermarkt_staff │ ├── get_staff_types.R │ └── tm_staff_types.csv └── worldfootballR_data.Rproj /.github/workflows/automated_adv_stats.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Store advanced stats 3 | 4 | # Controls when the action will run - have set this to run at: 5 | # 02:15 on Tuesday, Thursday, and Sunday 6 | on: 7 | schedule: 8 | - cron: "30 17 * * 0,2,4" 9 | 10 | 11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 12 | jobs: 13 | # This workflow contains a single job called "build" 14 | update-adv-stats: 15 | # The type of runner that the job will run on 16 | runs-on: macOS-latest 17 | # retrieve token 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | # Steps represent a sequence of tasks that will be executed as part of the job 21 | steps: 22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 23 | - uses: actions/checkout@v2 24 | - uses: r-lib/actions/setup-r@v2 25 | - name: Package Installation 26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))' 27 | - name: worldfootballR Package Installation 28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 29 | - name: Update advanced season stats 30 | run: Rscript -e 'source(here::here("R", "fb_big5_advanced_season_stats", "update_big5_advanced_stats.R"), echo = TRUE)' 31 | - name: Commit 32 | run: | 33 | git config --global user.name 'JaseZiv' 34 | git config --global user.email 'jaseziv83@gmail.com' 35 | git add . 36 | git commit -m 'updating data' || echo "No changes to commit" 37 | git push || echo "No changes to commit" 38 | -------------------------------------------------------------------------------- /.github/workflows/automated_fb_advanced_match_stats.yml: -------------------------------------------------------------------------------- 1 | name: Scrape FBref advanced match stats 2 | 3 | on: 4 | schedule: 5 | - cron: "13 10,22 * * *" 6 | workflow_dispatch: 7 | 8 | jobs: 9 | update-fb-advanced-match-stats: 10 | runs-on: macOS-latest 11 | env: 12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: r-lib/actions/setup-r@v2 16 | - name: Package Installation 17 | run: Rscript -e 'install.packages(c("remotes", "dplyr", "purrr", "tidyr", "tibble", "readr", "piggyback"))' 18 | - name: worldfootballR Package Installation 19 | run: Rscript -e 'remotes::install_github("JaseZiv/worldfootballR")' 20 | - name: Update match stats 21 | run: Rscript -e 'source(file.path("R", "fb_advanced_match_stats", "update_fb_advanced_match_stats.R"), echo = TRUE)' 22 | -------------------------------------------------------------------------------- /.github/workflows/automated_fb_match_shooting.yml: -------------------------------------------------------------------------------- 1 | name: Scrape FBref match shooting 2 | 3 | on: 4 | schedule: 5 | - cron: "45 17 * 1-5,8-12 0,2,4" 6 | workflow_dispatch: 7 | 8 | jobs: 9 | update-fb-match-shooting: 10 | runs-on: macOS-latest 11 | env: 12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: r-lib/actions/setup-r@v2 16 | - name: Package Installation 17 | run: Rscript -e 'install.packages(c("devtools", "dplyr", "purrr", "tidyr", "tibble", "lubridate", "readr", "piggyback"))' 18 | - name: worldfootballR Package Installation 19 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 20 | - name: Update match results 21 | run: Rscript -e 'source(file.path("R", "fb_match_shooting", "update_fb_match_shooting.R"), echo = TRUE)' 22 | -------------------------------------------------------------------------------- /.github/workflows/automated_fb_match_summary.yml: -------------------------------------------------------------------------------- 1 | name: Scrape FBref match summaries 2 | 3 | on: 4 | schedule: 5 | - cron: "58 17 * * 0,2,4" 6 | workflow_dispatch: 7 | 8 | jobs: 9 | update-fb-advanced-match-stats: 10 | runs-on: macOS-latest 11 | env: 12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: r-lib/actions/setup-r@v2 16 | - name: Package Installation 17 | run: Rscript -e 'install.packages(c("remotes", "dplyr", "purrr", "tidyr", "tibble", "readr", "piggyback"))' 18 | - name: worldfootballR Package Installation 19 | run: Rscript -e 'remotes::install_github("JaseZiv/worldfootballR")' 20 | - name: Update match stats 21 | run: Rscript -e 'source(file.path("R", "fb_match_summary", "update_fb_match_summary.R"), echo = TRUE)' 22 | -------------------------------------------------------------------------------- /.github/workflows/automated_match_results.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Store match results 3 | 4 | # Controls when the action will run - have set this to run at: 5 | # 02:00 on Tuesday, Thursday, and Sunday 6 | on: 7 | schedule: 8 | - cron: "15 17 * * 0,2,4" 9 | 10 | 11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 12 | jobs: 13 | # This workflow contains a single job called "build" 14 | update-match-results: 15 | # The type of runner that the job will run on 16 | runs-on: macOS-latest 17 | # retrieve token 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | # Steps represent a sequence of tasks that will be executed as part of the job 21 | steps: 22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 23 | - uses: actions/checkout@v2 24 | - uses: r-lib/actions/setup-r@v2 25 | - name: Package Installation 26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))' 27 | - name: worldfootballR Package Installation 28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 29 | - name: Update match results 30 | run: Rscript -e 'source(here::here("R", "fb_match_results", "update_fb_match_results.R"), echo = TRUE)' 31 | - name: Commit 32 | run: | 33 | git config --global user.name 'JaseZiv' 34 | git config --global user.email 'jaseziv83@gmail.com' 35 | git add . 36 | git commit -m 'updating data' || echo "No changes to commit" 37 | git push || echo "No changes to commit" 38 | -------------------------------------------------------------------------------- /.github/workflows/automated_match_results_cups.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Store International and Cups match results 3 | 4 | # Controls when the action will run - have set this to run at: 5 | # 02:00 on Tuesday, Thursday, and Sunday in 6 | # January, February, March, April, May, August, September, October, November, and December. 7 | on: 8 | schedule: 9 | - cron: "0 17 * * 0,2,4" 10 | 11 | 12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 13 | jobs: 14 | # This workflow contains a single job called "build" 15 | update-match-results: 16 | # The type of runner that the job will run on 17 | runs-on: macOS-latest 18 | # retrieve token 19 | env: 20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | - uses: r-lib/actions/setup-r@v2 26 | - name: Package Installation 27 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "lubridate", "janitor", "piggyback"))' 28 | - name: worldfootballR Package Installation 29 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 30 | - name: Update cup match results 31 | run: Rscript -e 'source(here::here("R", "fb_match_results_cups", "update_fb_cups_match_results.R"), echo = TRUE)' 32 | - name: Commit 33 | run: | 34 | git config --global user.name 'JaseZiv' 35 | git config --global user.email 'jaseziv83@gmail.com' 36 | git add . 37 | git commit -m 'updating data' || echo "No changes to commit" 38 | git push || echo "No changes to commit" 39 | -------------------------------------------------------------------------------- /.github/workflows/automated_understat_shots.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Scrape understat shots 3 | 4 | # Controls when the action will run - have set this to run at: 5 | # 02:00 on Tuesday, Thursday, and Sunday in 6 | # January, February, March, April, May, July, August, September, October, November, and December. 7 | on: 8 | schedule: 9 | - cron: "15 18 * 1-5,7-12 0,2,4" 10 | 11 | 12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 13 | jobs: 14 | # This workflow contains a single job called "build" 15 | update-understat-shots: 16 | # The type of runner that the job will run on 17 | runs-on: macOS-latest 18 | # retrieve token 19 | env: 20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | - uses: r-lib/actions/setup-r@v2 26 | - name: Package Installation 27 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "stringr", "here", "piggyback"))' 28 | - name: worldfootballR Package Installation 29 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 30 | - name: Update match results 31 | run: Rscript -e 'source(here::here("R", "understat_league_shots", "update_understat_shots.R"), echo = TRUE)' 32 | - name: Commit 33 | run: | 34 | git config --global user.name 'JaseZiv' 35 | git config --global user.email 'jaseziv83@gmail.com' 36 | git add . 37 | git commit -m 'updating data' || echo "No changes to commit" 38 | git push || echo "No changes to commit" 39 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: run_extracts 4 | 5 | # Controls when the action will run. 6 | on: 7 | schedule: 8 | - cron: "30 12 * * 5" 9 | 10 | 11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 12 | jobs: 13 | # This workflow contains a single job called "build" 14 | scrape-and-push: 15 | # The type of runner that the job will run on 16 | runs-on: macOS-latest 17 | # retrieve token 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | # Steps represent a sequence of tasks that will be executed as part of the job 21 | steps: 22 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 23 | - uses: actions/checkout@v2 24 | - uses: r-lib/actions/setup-r@v2 25 | - name: Package Installation 26 | run: Rscript -e 'install.packages(c("tidyverse" ,"devtools", "dplyr", "rvest", "httr", "stringr", "here", "xml2", "purrr", "janitor", "glue", "ratelimitr", "piggyback"))' 27 | - name: worldfootballR Package Installation 28 | run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' 29 | - name: Updated Data 30 | run: Rscript -e 'source(here::here("raw-data", "job_controller.R"), echo = TRUE)' 31 | - name: Commit 32 | run: | 33 | git config --global user.name 'JaseZiv' 34 | git config --global user.email 'jaseziv83@gmail.com' 35 | git add . 36 | git commit -m 'updating data' || echo "No changes to commit" 37 | git push || echo "No changes to commit" 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | .DS_Store 5 | .Rproj.user 6 | -------------------------------------------------------------------------------- /R/fb_advanced_match_stats/shared_fb_advanced_match_stats.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | params <- bind_rows( 3 | 'big5' = list( 4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'), 5 | tier = '1st', 6 | gender = 'M' 7 | ), 8 | 'other_1st_M' = list( 9 | country = 'USA', 10 | tier = '1st', 11 | gender = 'M' 12 | ), 13 | # 'other_1st_M' = list( 14 | # country = c('POR', 'NED', 'BRA', 'MEX', 'USA'), 15 | # tier = '1st', 16 | # gender = 'M' 17 | # ), 18 | # '1st_F' = list( 19 | # country = c('ENG', 'USA'), 20 | # tier = '1st', 21 | # gender = 'F' 22 | # ), 23 | # '2nd_M' = list( 24 | # country = c('ENG'), 25 | # tier = '2nd', 26 | # gender = 'M' 27 | # ), 28 | .id = 'group' 29 | ) 30 | -------------------------------------------------------------------------------- /R/fb_advanced_match_stats/update_fb_advanced_match_stats.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | 9 | source(file.path('R', 'piggyback.R')) 10 | source(file.path('R', 'fb_advanced_match_stats', 'shared_fb_advanced_match_stats.R')) 11 | 12 | all_seasons <- readr::read_csv( 13 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv' 14 | ) 15 | 16 | seasons <- all_seasons |> 17 | dplyr::semi_join( 18 | params, 19 | by = c('country', 'tier', 'gender') 20 | ) |> 21 | dplyr::filter(season_end_year >= 2017L) |> 22 | dplyr::distinct( 23 | country, 24 | gender, 25 | tier, 26 | season_end_year 27 | ) 28 | 29 | scrape_fb_advanced_match_stats <- function(url, stat_type, team_or_player) { 30 | message( 31 | sprintf( 32 | 'Scraping matches for url = `"%s"`,\n`stat_type = "%s"`, `team_or_player = "%s"`.', 33 | url, 34 | stat_type, 35 | team_or_player 36 | ) 37 | ) 38 | worldfootballR::fb_advanced_match_stats( 39 | url, 40 | stat_type = stat_type, 41 | team_or_player = team_or_player 42 | ) 43 | } 44 | 45 | possibly_scrape_fb_advanced_match_stats <- purrr::possibly( 46 | scrape_fb_advanced_match_stats, 47 | otherwise = tibble::tibble(), 48 | quiet = FALSE 49 | ) 50 | 51 | slowly_possibly_scrape_fb_advanced_match_stats <- purrr::slowly( 52 | possibly_scrape_fb_advanced_match_stats, 53 | rate = purrr::rate_delay(pause = 5), 54 | quiet = TRUE 55 | ) 56 | 57 | bind_with_type_coercion <- function(df1, df2) { 58 | common_cols <- intersect(names(df1), names(df2)) 59 | 60 | class_df1 <- sapply(df1[common_cols], class) 61 | class_df2 <- sapply(df2[common_cols], class) 62 | 63 | cols_to_coerce <- common_cols[class_df1 != class_df2] 64 | 65 | if (length(cols_to_coerce) > 0) { 66 | message( 67 | sprintf('Coerceing these columns to strings: `%s`', paste0(cols_to_coerce, collapse = '`, `')) 68 | ) 69 | df1[cols_to_coerce] <- lapply(df1[cols_to_coerce], as.character) 70 | df2[cols_to_coerce] <- lapply(df2[cols_to_coerce], as.character) 71 | } 72 | 73 | dplyr::bind_rows(df1, df2) 74 | } 75 | 76 | fb_advanced_match_stats_tag <- 'fb_advanced_match_stats' 77 | update_fb_advanced_match_stats <- function( 78 | country = 'ENG', 79 | gender = 'M', 80 | tier = '1st', 81 | stat_type = 'summary', 82 | team_or_player = 'player' 83 | ) { 84 | name <- sprintf('%s_%s_%s_%s_%s_advanced_match_stats', country, gender, tier, stat_type, team_or_player) 85 | message(sprintf('Updating %s.', name)) 86 | 87 | filtered_seasons <- seasons |> 88 | dplyr::filter( 89 | country == !!country, 90 | gender == !!gender, 91 | tier == !!tier 92 | ) |> 93 | dplyr::pull(season_end_year) 94 | 95 | latest_season <- max(filtered_seasons) 96 | 97 | match_urls <- worldfootballR::fb_match_urls( 98 | country = country, 99 | tier = tier, 100 | gender = gender, 101 | season_end_year = latest_season 102 | ) 103 | 104 | existing_data <- read_worldfootballr( 105 | name = name, 106 | tag = fb_advanced_match_stats_tag 107 | ) 108 | existing_match_urls <- unique(existing_data$MatchURL) 109 | new_match_urls <- setdiff(match_urls, existing_match_urls) 110 | 111 | if (length(new_match_urls) == 0) { 112 | message( 113 | sprintf('No new match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `stat_type = "%s"`, `team_or_player = "%s"`', country, gender, tier, stat_type, team_or_player) 114 | ) 115 | return(existing_data) 116 | } 117 | 118 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC') 119 | 120 | new_data <- new_match_urls |> 121 | rlang::set_names() |> 122 | purrr::map_dfr( 123 | \(.x) slowly_possibly_scrape_fb_advanced_match_stats( 124 | url = .x, 125 | stat_type = stat_type, 126 | team_or_player = team_or_player 127 | ), 128 | .id = 'MatchURL' 129 | ) |> 130 | dplyr::relocate(MatchURL, .before = 1) 131 | 132 | match_results <- worldfootballR::load_match_results( 133 | country = country, 134 | tier = tier, 135 | gender = gender, 136 | season_end_year = filtered_seasons 137 | ) 138 | 139 | res <- bind_with_type_coercion( 140 | existing_data, 141 | new_data |> 142 | dplyr::inner_join( 143 | match_results |> 144 | dplyr::transmute( 145 | Competition_Name, 146 | Gender, 147 | Country, 148 | Tier = .env$tier, 149 | Season_End_Year, 150 | MatchURL 151 | ), 152 | by = 'MatchURL' 153 | ) 154 | ) |> 155 | tibble::as_tibble() 156 | 157 | attr(res, 'scrape_timestamp') <- scrape_time_utc 158 | 159 | write_worldfootballr_rds_and_csv( 160 | x = res, 161 | name = name, 162 | tag = fb_advanced_match_stats_tag 163 | ) 164 | 165 | res 166 | } 167 | 168 | current_time <- lubridate::now(tzone = 'UTC') 169 | current_wday <- lubridate::wday(current_time) 170 | current_hour <- lubridate::hour(current_time) 171 | 172 | team_or_players <- if (current_wday %% 2 == 0) { 173 | 'player' 174 | } else { 175 | 'team' 176 | } 177 | 178 | stat_types <- if (current_hour <= 12) { 179 | c('summary', 'passing', 'passing_types') 180 | } else { 181 | c('defense', 'possession', 'misc', 'keeper') 182 | } 183 | 184 | params |> 185 | tidyr::crossing( 186 | stat_type = factor(stat_types, levels = c('summary', 'passing', 'passing_types', 'defense', 'possession', 'misc', 'keeper')), 187 | team_or_player = factor(team_or_players, levels = c('team', 'player')) 188 | ) |> 189 | dplyr::arrange( 190 | stat_type, 191 | team_or_player 192 | ) |> 193 | dplyr::mutate( 194 | data = purrr::pmap( 195 | list( 196 | country, 197 | gender, 198 | tier, 199 | stat_type, 200 | team_or_player 201 | ), 202 | ~update_fb_advanced_match_stats( 203 | country = ..1, 204 | gender = ..2, 205 | tier = ..3, 206 | stat_type = ..4, 207 | team_or_player = ..5 208 | ) 209 | ) 210 | ) 211 | 212 | -------------------------------------------------------------------------------- /R/fb_big5_advanced_season_stats/backfill_big5_advanced_stats.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | source("R/piggyback.R") 6 | 7 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 8 | 9 | 10 | latest_season <- seasons %>% 11 | # filtering out things that aren't domestic leagues: 12 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Big 5 European Leagues")) %>% 13 | group_by(country) %>% slice_max(season_end_year) %>% 14 | pull(season_end_year) 15 | 16 | #================================================================================================ 17 | # Player Advanced Stats --------------------------------------------------- 18 | #================================================================================================ 19 | 20 | backfill_player_advanced <- function(season_end, stat) { 21 | df <- fb_big5_advanced_season_stats(season_end_year= season_end, stat_type= stat, team_or_player= "player") 22 | df <- df %>% relocate(Url, .after = last_col()) 23 | 24 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 25 | attr(df, "scrape_timestamp") <- scrape_time_utc 26 | 27 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", stat, ".rds"))) 28 | write_worldfootballr(x=df, name = paste0("big5_player_", stat), tag = "fb_big5_advanced_season_stats", ext = "rds") 29 | 30 | } 31 | 32 | 33 | backfill_player_advanced(season_end= c(2010:2023), stat= "standard") 34 | backfill_player_advanced(season_end= c(2010:2023), stat= "playing_time") 35 | backfill_player_advanced(season_end= c(2010:2023), stat= "shooting") 36 | backfill_player_advanced(season_end= c(2018:2023), stat= "passing") 37 | backfill_player_advanced(season_end= c(2018:2023), stat= "passing_types") 38 | backfill_player_advanced(season_end= c(2018:2023), stat= "gca") 39 | backfill_player_advanced(season_end= c(2018:2023), stat= "defense") 40 | backfill_player_advanced(season_end= c(2018:2023), stat= "possession") 41 | backfill_player_advanced(season_end= c(2010:2023), stat= "misc") 42 | backfill_player_advanced(season_end= c(2010:2023), stat= "keepers") 43 | backfill_player_advanced(season_end= c(2018:2023), stat= "keepers_adv") 44 | 45 | 46 | #================================================================================================ 47 | # Team Advanced Stats ----------------------------------------------------- 48 | #================================================================================================ 49 | 50 | backfill_team_advanced <- function(season_end, stat) { 51 | df <- fb_big5_advanced_season_stats(season_end_year= season_end, stat_type= stat, team_or_player= "team") 52 | df <- df %>% relocate(Url, .after = last_col()) 53 | 54 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 55 | attr(df, "scrape_timestamp") <- scrape_time_utc 56 | 57 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", stat, ".rds"))) 58 | write_worldfootballr(x=df, name = paste0("big5_team_", stat), tag = "fb_big5_advanced_season_stats", ext = "rds") 59 | 60 | } 61 | 62 | 63 | backfill_team_advanced(season_end= c(2010:2023), stat= "playing_time") 64 | 65 | backfill_team_advanced(season_end= c(2010:2023), stat= "standard") 66 | backfill_team_advanced(season_end= c(2010:2023), stat= "shooting") 67 | backfill_team_advanced(season_end= c(2018:2023), stat= "passing") 68 | backfill_team_advanced(season_end= c(2018:2023), stat= "passing_types") 69 | backfill_team_advanced(season_end= c(2018:2023), stat= "gca") 70 | backfill_team_advanced(season_end= c(2018:2023), stat= "defense") 71 | backfill_team_advanced(season_end= c(2018:2023), stat= "possession") 72 | backfill_team_advanced(season_end= c(2010:2023), stat= "misc") 73 | backfill_team_advanced(season_end= c(2010:2023), stat= "keepers") 74 | backfill_team_advanced(season_end= c(2018:2023), stat= "keepers_adv") 75 | 76 | 77 | -------------------------------------------------------------------------------- /R/fb_big5_advanced_season_stats/update_big5_advanced_stats.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(dplyr) 3 | library(stringr) 4 | library(here) 5 | 6 | source("R/piggyback.R") 7 | 8 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 9 | 10 | 11 | latest_season <- seasons %>% 12 | # filtering out things that aren't domestic leagues: 13 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Big 5 European Leagues")) %>% 14 | group_by(country) %>% slice_max(season_end_year) %>% 15 | pull(season_end_year) 16 | 17 | 18 | 19 | stat_types <- c("standard", "playing_time", "shooting", "passing", "passing_types", "gca", 20 | "defense", "possession", "misc", "keepers", "keepers_adv") 21 | 22 | 23 | #========================================================================================== 24 | # Update Player Advanced Stats -------------------------------------------- 25 | #========================================================================================== 26 | 27 | for(each_stat in stat_types) { 28 | 29 | print(paste0("Updating player stat: ", each_stat)) 30 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 31 | 32 | df <- read_worldfootballr_rds(name=paste0("big5_player_", each_stat), tag = "fb_big5_advanced_season_stats") 33 | # df <- readRDS(here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", each_stat, ".rds"))) 34 | 35 | new_dat <- fb_big5_advanced_season_stats(season_end_year= latest_season, stat_type= each_stat, team_or_player= "player", time_pause = 5) 36 | 37 | df <- df %>% 38 | filter(Season_End_Year != latest_season) 39 | 40 | df <- bind_rows(df, new_dat) 41 | 42 | attr(df, "scrape_timestamp") <- scrape_time_utc 43 | 44 | write_worldfootballr(x=df, name = paste0("big5_player_", each_stat), tag = "fb_big5_advanced_season_stats", ext = "rds") 45 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_player_", each_stat, ".rds"))) 46 | } 47 | 48 | 49 | 50 | #========================================================================================== 51 | # Update Team Advanced Stats ---------------------------------------------- 52 | #========================================================================================== 53 | 54 | for(each_stat in stat_types) { 55 | 56 | print(paste0("Updating team stat: ", each_stat)) 57 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 58 | 59 | df <- read_worldfootballr_rds(name=paste0("big5_team_", each_stat), tag = "fb_big5_advanced_season_stats") 60 | # df <- readRDS(here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", each_stat, ".rds"))) 61 | 62 | new_dat <- fb_big5_advanced_season_stats(season_end_year= latest_season, stat_type= each_stat, team_or_player= "team", time_pause = 5) 63 | 64 | df <- df %>% 65 | filter(Season_End_Year != latest_season) 66 | 67 | df <- bind_rows(df, new_dat) 68 | 69 | attr(df, "scrape_timestamp") <- scrape_time_utc 70 | 71 | write_worldfootballr(x=df, name = paste0("big5_team_", each_stat), tag = "fb_big5_advanced_season_stats", ext = "rds") 72 | # saveRDS(df, here("data", "fb_big5_advanced_season_stats", paste0("big5_team_", each_stat, ".rds"))) 73 | } 74 | 75 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 76 | # saveRDS(scrape_time_utc, here("data", "fb_big5_advanced_season_stats", "scrape_time_big5_advanced_season_stats.rds")) 77 | 78 | -------------------------------------------------------------------------------- /R/fb_match_results/update_fb_match_results.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | source("R/piggyback.R") 6 | 7 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 8 | 9 | 10 | latest_seasons <- seasons %>% 11 | # filtering out things that aren't domestic leagues: 12 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues"), 13 | tier != "", 14 | !is.na(.data[["country"]])) %>% 15 | filter(!is.na(country), country != "") %>% 16 | group_by(country) %>% slice_max(season_end_year) %>% 17 | distinct() 18 | 19 | 20 | 21 | countries_to_get <- latest_seasons %>% 22 | # filtering out things that aren't domestic leagues: 23 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues"), 24 | tier != "", 25 | !is.na(.data[["country"]])) %>% 26 | filter(!is.na(country), country != "") %>% 27 | # also want to keep only seasons that are not yet completed 28 | filter(!is_completed) %>% pull(country) %>% unique() 29 | 30 | 31 | #======================================================================================= 32 | # Update Match Results ---------------------------------------------------- 33 | #======================================================================================= 34 | 35 | update_fb_match_results <- function(each_country) { 36 | 37 | print(paste0("Getting Country: ", each_country)) 38 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 39 | 40 | # dat_url <- paste0("https://github.com/JaseZiv/worldfootballR_data/blob/master/data/match_results/", each_country, "_match_results.rds?raw=true") 41 | # 42 | # existing_df <- .file_reader(dat_url) 43 | 44 | existing_df <- read_worldfootballr_rds(name=paste0(each_country, "_match_results"), tag = "match_results") 45 | # existing_df <- tryCatch(readRDS(here("data", "match_results", paste0(each_country, "_match_results.rds"))), error = function(e) data.frame()) 46 | 47 | # we could scrape every leage for the most recent, but if the season has finished, what's the point? 48 | # The below logic will look to get any games where there are missing scores (we make the assumption that these are not yet played) 49 | # and if the date of these games is earlier than the scraping date, then scrape the results 50 | # df2 <- existing_df %>% filter(Season_End_Year == max(existing_df$Season_End_Year)) 51 | # date_not_collected <- df2 %>% filter(is.na(HomeGoals) & is.na(AwayGoals)) %>% arrange(Date) %>% pull(Date) %>% min() 52 | 53 | # if(date_not_collected < Sys.Date()) { 54 | 55 | fixture_urls <- latest_seasons %>% filter(country == each_country) %>% pull(fixtures_url) 56 | # get the updated data 57 | new_df <- tryCatch(fixture_urls %>% purrr::map_df(worldfootballR::.get_each_season_results), error = function(e) data.frame()) 58 | 59 | if(nrow(new_df) != 0) { 60 | 61 | new_df_full <- latest_seasons %>% filter(country == each_country) %>% 62 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>% 63 | dplyr::right_join(new_df, by = c("fixtures_url" = "fixture_url")) %>% 64 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>% 65 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>% 66 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], as.numeric(.data[["Wk"]]), .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T) 67 | 68 | if(nrow(existing_df) != 0) { 69 | existing_df <- existing_df %>% 70 | anti_join(new_df_full, by = c("Gender", "Season_End_Year", "Tier")) 71 | 72 | new_df_full <- bind_rows(existing_df, new_df_full) %>% 73 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Date"]], .data[["Time"]], as.numeric(.data[["Wk"]])) %>% dplyr::distinct(.keep_all = T) 74 | } 75 | 76 | attr(new_df_full, "scrape_timestamp") <- scrape_time_utc 77 | 78 | write_worldfootballr(x=new_df_full, name = paste0(each_country, "_match_results"), tag = "match_results", ext = "rds") 79 | # saveRDS(new_df_full, here("data", "match_results", paste0(each_country, "_match_results.rds"))) 80 | 81 | } 82 | 83 | # } 84 | 85 | 86 | } 87 | 88 | 89 | # update data: 90 | countries_to_get %>% purrr::map(update_fb_match_results) 91 | 92 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 93 | # saveRDS(scrape_time_utc, here("data", "match_results", "scrape_time_match_results.rds")) 94 | 95 | 96 | -------------------------------------------------------------------------------- /R/fb_match_results_cups/backfill_fb_cups_match_results.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | source("R/piggyback.R") 6 | 7 | backfill_historical_comp_results <- function(competition_collect) { 8 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 9 | 10 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 11 | 12 | 13 | fixtures_df <- seasons %>% 14 | # get only things that aren't domestic leagues: 15 | dplyr::filter(!stringr::str_detect(.data[["competition_type"]], "Leagues")) %>% 16 | # get seasons that are only for the competition selected 17 | dplyr::filter(competition_name %in% competition_collect, 18 | !is.na(.data[["season_end_year"]])) %>% 19 | dplyr::arrange(desc(.data[["season_end_year"]])) 20 | 21 | fixtures_urls <- fixtures_df %>% 22 | dplyr::pull(.data[["fixtures_url"]]) %>% unique() 23 | 24 | 25 | all_results <- data.frame() 26 | for(each_fixture in 1:length(fixtures_urls)) { 27 | print(paste0("Scraping URL ", each_fixture, " of ", length(fixtures_urls))) 28 | df <- worldfootballR::.get_each_season_results(fixture_url = fixtures_urls[each_fixture], time_pause = runif(1, 4, 6)) 29 | 30 | all_results <- bind_rows(all_results, df) 31 | } 32 | 33 | all_results <- fixtures_df %>% 34 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>% 35 | dplyr::right_join(all_results, by = c("fixtures_url" = "fixture_url")) %>% 36 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>% 37 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>% 38 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Wk"]], .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T) 39 | 40 | # return(all_results) 41 | # clean names for files - will need to repeat this step for loading functions to convert the text users will see 42 | # as the competition name to this file name structure 43 | comp_name_file <- janitor::make_clean_names(competition_collect) 44 | # add the time stamp 45 | attr(all_results, "scrape_timestamp") <- scrape_time_utc 46 | 47 | # saveRDS(all_results, here("data", "match_results_cups", paste0(comp_name_file, "_match_results.rds"))) 48 | write_worldfootballr(x=all_results, name = paste0(comp_name_file, "_match_results"), tag = "match_results_cups", ext = "rds") 49 | } 50 | 51 | 52 | 53 | #================================================================================================================================================== 54 | # Get Data ---------------------------------------------------------------- 55 | 56 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 57 | 58 | 59 | cups_to_get <- seasons %>% 60 | # Getting only things that aren't domestic leagues: 61 | dplyr::filter(!stringr::str_detect(.data[["competition_type"]], "Leagues"), 62 | !is.na(.data[["season_end_year"]])) %>% 63 | dplyr::pull(competition_name) %>% unique() 64 | 65 | 66 | # the below cups are one off matches so we don't need scores and fixtures for these: 67 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana") 68 | 69 | # filter them out 70 | cups_to_get <- cups_to_get[!cups_to_get %in% exclusion_cups] 71 | 72 | # get data for all cups/competitions 73 | for(each_cup in cups_to_get){ 74 | print(paste("Scraping", each_cup)) 75 | backfill_historical_comp_results(each_cup) 76 | } 77 | -------------------------------------------------------------------------------- /R/fb_match_results_cups/update_fb_cups_match_results.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(lubridate) 4 | library(janitor) 5 | library(here) 6 | 7 | source("R/piggyback.R") 8 | 9 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 10 | 11 | # the below cups are one off matches so we don't need scores and fixtures for these: 12 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana") 13 | 14 | latest_cup_seasons <- seasons %>% 15 | # filtering out things that aren't domestic leagues: 16 | filter(!stringr::str_detect(.data[["competition_type"]], "Leagues"), 17 | # and also the single match type cup games: 18 | !.data[["competition_name"]] %in% exclusion_cups) %>% 19 | group_by(competition_name) %>% slice_max(season_end_year) %>% 20 | distinct() 21 | 22 | latest_cup_seasons <- latest_cup_seasons %>% 23 | mutate(completed_new = 24 | case_when( 25 | competition_type == "National Team Qualification" & season_end_year >= lubridate::year(lubridate::today()) ~ FALSE, 26 | is.na(is_completed) ~ FALSE, 27 | TRUE ~ is_completed 28 | ) 29 | ) 30 | 31 | 32 | 33 | cups_to_get <- latest_cup_seasons %>% 34 | # filtering out things that aren't domestic leagues: 35 | dplyr::filter(!completed_new) %>% 36 | pull(competition_name) %>% unique() 37 | 38 | 39 | #======================================================================================= 40 | # Update Match Results ---------------------------------------------------- 41 | #======================================================================================= 42 | 43 | update_fb_comp_match_results <- function(each_comp) { 44 | 45 | print(paste0("Getting Competition: ", each_comp)) 46 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 47 | 48 | f_name <- janitor::make_clean_names(each_comp) 49 | 50 | existing_df <- read_worldfootballr_rds(name=paste0(f_name, "_match_results"), tag = "match_results_cups") 51 | # existing_df <- tryCatch(readRDS(here("data", "match_results_cups", paste0(f_name, "_match_results.rds"))), error = function(e) data.frame()) 52 | 53 | # we could scrape every leage for the most recent, but if the season has finished, what's the point? 54 | # The below logic will look to get any games where there are missing scores (we make the assumption that these are not yet played) 55 | # and if the date of these games is earlier than the scraping date, then scrape the results 56 | # df2 <- existing_df %>% filter(Season_End_Year == max(existing_df$Season_End_Year)) 57 | # date_not_collected <- df2 %>% filter(is.na(HomeGoals) & is.na(AwayGoals)) %>% arrange(Date) %>% pull(Date) %>% min() 58 | 59 | # if(date_not_collected < Sys.Date()) { 60 | 61 | fixture_urls <- latest_cup_seasons %>% filter(competition_name == each_comp) %>% pull(fixtures_url) 62 | # get the updated data 63 | new_df <- tryCatch(fixture_urls %>% purrr::map_df(worldfootballR::.get_each_season_results), error = function(e) data.frame()) 64 | 65 | if(nrow(new_df) != 0) { 66 | 67 | new_df_full <- latest_cup_seasons %>% filter(competition_name == each_comp) %>% 68 | dplyr::select(Competition_Name=.data[["competition_name"]], Gender=.data[["gender"]], Country=.data[["country"]], Season_End_Year=.data[["season_end_year"]], Tier=.data[["tier"]], .data[["seasons_urls"]], .data[["fixtures_url"]]) %>% 69 | dplyr::right_join(new_df, by = c("fixtures_url" = "fixture_url")) %>% 70 | dplyr::select(-.data[["seasons_urls"]], -.data[["fixtures_url"]]) %>% 71 | dplyr::mutate(Date = lubridate::ymd(.data[["Date"]])) %>% 72 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], as.numeric(.data[["Wk"]]), .data[["Date"]], .data[["Time"]]) %>% dplyr::distinct(.keep_all = T) 73 | 74 | if(nrow(existing_df) != 0) { 75 | existing_df <- existing_df %>% 76 | anti_join(new_df_full, by = c("Gender", "Season_End_Year", "Tier")) 77 | 78 | new_df_full <- bind_rows(existing_df, new_df_full) %>% 79 | dplyr::arrange(.data[["Country"]], .data[["Competition_Name"]], .data[["Gender"]], .data[["Season_End_Year"]], .data[["Date"]], .data[["Time"]], as.numeric(.data[["Wk"]])) %>% dplyr::distinct(.keep_all = T) 80 | } 81 | 82 | attr(new_df_full, "scrape_timestamp") <- scrape_time_utc 83 | 84 | write_worldfootballr(x=new_df_full, name = paste0(f_name, "_match_results"), tag = "match_results_cups", ext = "rds") 85 | # saveRDS(new_df_full, here("data", "match_results_cups", paste0(f_name, "_match_results.rds"))) 86 | 87 | } 88 | 89 | # } 90 | 91 | 92 | } 93 | 94 | 95 | # update data: 96 | cups_to_get %>% purrr::map(update_fb_comp_match_results) 97 | 98 | # scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 99 | # saveRDS(scrape_time_utc, here("data", "match_results", "scrape_time_match_results.rds")) 100 | 101 | 102 | -------------------------------------------------------------------------------- /R/fb_match_shooting/adhoc_fb_match_shooting.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | 9 | source(file.path('R', 'piggyback.R')) 10 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R')) 11 | 12 | ## fix tier ---- 13 | fb_match_shooting_tag <- 'fb_match_shooting' 14 | params |> 15 | filter(group != 'non_domestic') |> 16 | mutate( 17 | data = pmap( 18 | list( 19 | country, 20 | gender, 21 | tier 22 | ), 23 | \(.country, .gender, .tier) { 24 | name <- sprintf('%s_%s_%s_match_shooting', .country, .gender, .tier) 25 | message(sprintf('Updating %s.', name)) 26 | existing_match_shooting <- read_worldfootballr_rds( 27 | name = name, 28 | tag = fb_match_shooting_tag 29 | ) 30 | 31 | if (all(!is.na(existing_match_shooting$Tier))) { 32 | return(existing_match_shooting) 33 | } 34 | existing_match_shooting |> 35 | mutate(Tier = coalesce(Tier, .tier)) |> 36 | write_worldfootballr_rds_and_csv( 37 | name = name, 38 | tag = fb_match_shooting_tag 39 | ) 40 | } 41 | ) 42 | ) 43 | 44 | ## fix some incomplete matches ---- 45 | fb_match_shooting_tag <- 'fb_match_shooting' 46 | country <- 'ENG' 47 | gender <- 'M' 48 | tier <- '1st' 49 | name <- sprintf('%s_%s_%s_match_shooting', country, gender, tier) 50 | existing_fb_match_shooting <- read_worldfootballr_rds( 51 | name = name, 52 | tag = fb_match_shooting_tag 53 | ) 54 | 55 | scrape_fb_match_shooting <- function(match_url) { 56 | message(sprintf('Scraping matches for %s.', match_url)) 57 | fb_match_shooting(match_url) 58 | } 59 | 60 | ## games not including all shots when originally scraped 61 | new_fb_match_shooting <- c( 62 | c( 63 | 'https://fbref.com/en/matches/070bf86d/Burnley-Newcastle-United-May-4-2024-Premier-League', 64 | 'https://fbref.com/en/matches/91a2da3b/Sheffield-United-Nottingham-Forest-May-4-2024-Premier-League' 65 | ) 66 | ) |> 67 | set_names() |> 68 | map_dfr(scrape_fb_match_shooting, .id = 'MatchURL') |> 69 | as_tibble() 70 | 71 | matching_matches <- new_fb_match_shooting |> 72 | distinct(MatchURL) |> 73 | inner_join( 74 | existing_fb_match_shooting |> 75 | distinct(MatchURL, Competition_Name, Gender, Country, Tier, Season_End_Year) 76 | ) 77 | 78 | bind_rows( 79 | existing_fb_match_shooting |> 80 | filter(!(MatchURL %in% matching_matches$MatchURL)), 81 | new_fb_match_shooting |> 82 | left_join(matching_matches) 83 | ) |> 84 | mutate(Tier = '1st') |> ## temp fix 85 | write_worldfootballr_rds_and_csv( 86 | name = name, 87 | tag = fb_match_shooting_tag 88 | ) 89 | -------------------------------------------------------------------------------- /R/fb_match_shooting/backfill_fb_match_shooting.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | 9 | PARENT_DATA_DIR <- file.path('data', 'fb_match_shooting') 10 | SUB_DATA_DIR <- file.path(PARENT_DATA_DIR, 'match_shooting') 11 | dir.create(PARENT_DATA_DIR, showWarnings = FALSE) 12 | dir.create(SUB_DATA_DIR, showWarnings = FALSE) 13 | 14 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R')) 15 | 16 | scrape_fb_match_shooting <- function(url, data_dir, overwrite = FALSE) { 17 | rds_path <- file.path(data_dir, sprintf('%s.rds', basename(url))) 18 | if (!dir.exists(dirname(rds_path))) { dir.create(dirname(rds_path), showWarnings = FALSE, recursive = TRUE) } 19 | 20 | suffix <- sprintf('for `url = "%s"`.', url) 21 | if (file.exists(rds_path) & !overwrite) { 22 | return(readr::read_rds(rds_path)) 23 | } 24 | message(sprintf('Scraping data %s', suffix)) 25 | res <- worldfootballR::fb_match_shooting(url) 26 | readr::write_rds(res, rds_path) 27 | res 28 | } 29 | 30 | possibly_scrape_fb_match_shooting <- possibly( 31 | scrape_fb_match_shooting, 32 | otherwise = tibble::tibble(), 33 | quiet = FALSE 34 | ) 35 | 36 | slowly_possibly_scrape_fb_match_shooting <- purrr::slowly( 37 | possibly_scrape_fb_match_shooting, 38 | rate = purrr::rate_delay(pause = 5), 39 | quiet = FALSE 40 | ) 41 | 42 | backfill_fb_match_shooting <- function( 43 | country = 'ENG', 44 | gender = 'M', 45 | tier = '1st', 46 | group = 'big5', 47 | season_end_years = 2025 48 | ) { 49 | 50 | rds_path <- file.path(PARENT_DATA_DIR, sprintf('%s_%s_%s_match_shooting.rds', country, gender, tier)) 51 | message(sprintf('Updating %s.', rds_path)) 52 | 53 | if (is.null(season_end_years)) { 54 | first_season_end_year <- ifelse( 55 | group == 'big5', 56 | 2018, 57 | 2019 58 | ) 59 | 60 | last_season_end_year <- lubridate::year(Sys.Date()) + 1L 61 | season_end_years <- first_season_end_year:last_season_end_year 62 | } else { 63 | last_season_end_year <- max(season_end_years) 64 | } 65 | 66 | res <- purrr::map_dfr( 67 | season_end_years, 68 | function(season_end_year) { 69 | 70 | season_path <- file.path(SUB_DATA_DIR, country, gender, tier, paste0(season_end_year, '.rds')) 71 | # if (season_end_year < last_season_end_year & file.exists(season_path)) { 72 | # return(readRDS(season_path)) 73 | # } 74 | if (file.exists(season_path)) { 75 | return(readRDS(season_path)) 76 | } 77 | 78 | match_urls <- worldfootballR::fb_match_urls( 79 | country = country, 80 | tier = tier, 81 | gender = gender, 82 | season_end_year = season_end_year 83 | ) 84 | 85 | if (length(match_urls) == 0) { 86 | warning( 87 | sprintf('No match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `season_end_year = %s`.', country, gender, tier, season_end_year) 88 | ) 89 | return(tibble::tibble()) 90 | } 91 | 92 | new_data <- match_urls |> 93 | rlang::set_names() |> 94 | purrr::map_dfr( 95 | \(.x) slowly_possibly_scrape_fb_match_shooting( 96 | url = .x, 97 | data_dir = file.path(SUB_DATA_DIR, country, gender, tier, season_end_year) 98 | ), 99 | .id = 'MatchURL' 100 | ) |> 101 | dplyr::relocate(MatchURL, .before = 1) 102 | 103 | ## for the URLs 104 | match_results <- worldfootballR::load_match_results( 105 | country = country, 106 | tier = tier, 107 | gender = gender, 108 | season_end_year = season_end_year 109 | ) 110 | 111 | res <- new_data |> 112 | dplyr::inner_join( 113 | match_results |> 114 | dplyr::transmute( 115 | Competition_Name, 116 | Gender, 117 | Country, 118 | Tier = .env$tier, 119 | Season_End_Year, 120 | MatchURL 121 | ), 122 | by = dplyr::join_by(MatchURL) 123 | ) |> 124 | tibble::as_tibble() 125 | saveRDS(res, season_path) 126 | res 127 | } 128 | ) 129 | 130 | attr(res, 'scrape_timestamp') <- as.POSIXlt(Sys.time(), tz = 'UTC') 131 | readr::write_rds( 132 | res, 133 | rds_path 134 | ) 135 | 136 | invisible(res) 137 | } 138 | 139 | local_data <- params |> 140 | # dplyr::filter( 141 | # ( 142 | # # country == 'ENG' & 143 | # group != 'big5' & 144 | # gender == 'M' & 145 | # tier == '1st' 146 | # ) 147 | # ) |> 148 | dplyr::mutate( 149 | data = purrr::pmap( 150 | list( 151 | country, 152 | gender, 153 | tier, 154 | group 155 | ), 156 | ~backfill_fb_match_shooting( 157 | country = ..1, 158 | gender = ..2, 159 | tier = ..3, 160 | group = ..4, 161 | season_end_years = NULL 162 | ) 163 | ) 164 | ) 165 | 166 | 167 | ## could just put this in the function, but i want to check locally before i upload 168 | # source(file.path('R', 'piggyback.R')) 169 | # local_data |> 170 | # mutate( 171 | # name = sprintf('%s_%s_%s_match_shooting', country, gender, tier), 172 | # res = map2( 173 | # data, 174 | # name, 175 | # ~{ 176 | # write_worldfootballr_rds_and_csv( 177 | # x = .x, 178 | # name = .y, 179 | # tag = 'fb_match_shooting' 180 | # ) 181 | # } 182 | # ) 183 | # ) 184 | # 185 | -------------------------------------------------------------------------------- /R/fb_match_shooting/shared_fb_match_shooting.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | params <- dplyr::bind_rows( 3 | 'big5' = list( 4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'), 5 | tier = '1st', 6 | gender = 'M' 7 | ), 8 | 'other_1st_M' = list( 9 | country = c('POR', 'NED', 'BRA', 'MEX', 'USA'), 10 | tier = '1st', 11 | gender = 'M' 12 | ), 13 | '1st_F' = list( 14 | country = c('ENG', 'USA', 'ESP'), 15 | tier = '1st', 16 | gender = 'F' 17 | ), 18 | '2nd_M' = list( 19 | country = c('ENG'), 20 | tier = '2nd', 21 | gender = 'M' 22 | ), 23 | .id = 'group' 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /R/fb_match_shooting/update_fb_match_shooting.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | library(lubridate) 9 | 10 | source(file.path('R', 'piggyback.R')) 11 | source(file.path('R', 'fb_match_shooting', 'shared_fb_match_shooting.R')) 12 | 13 | all_seasons <- readr::read_csv( 14 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv' 15 | ) 16 | 17 | seasons <- all_seasons |> 18 | dplyr::semi_join( 19 | params, 20 | by = c('country', 'tier', 'gender') 21 | ) |> 22 | dplyr::filter(season_end_year >= 2017L) |> 23 | dplyr::distinct( 24 | country, 25 | gender, 26 | tier, 27 | season_end_year 28 | ) 29 | 30 | scrape_fb_match_shooting <- function(match_url) { 31 | message(sprintf('Scraping matches for %s.', match_url)) 32 | worldfootballR::fb_match_shooting(match_url) 33 | } 34 | 35 | possibly_scrape_fb_match_shooting <- purrr::possibly( 36 | scrape_fb_match_shooting, 37 | otherwise = tibble::tibble(), 38 | quiet = FALSE 39 | ) 40 | 41 | fb_match_shooting_tag <- 'fb_match_shooting' 42 | update_fb_match_shooting <- function(country, gender = 'M', tier = '1st', date_threshold = 3L) { 43 | name <- sprintf('%s_%s_%s_match_shooting', country, gender, tier) 44 | message(sprintf('Updating %s.', name)) 45 | 46 | filtered_seasons <- seasons |> 47 | dplyr::filter( 48 | country == !!country, 49 | gender == !!gender, 50 | tier == !!tier 51 | ) |> 52 | dplyr::pull(season_end_year) 53 | 54 | latest_season <- max(filtered_seasons) 55 | 56 | match_urls <- worldfootballR::fb_match_urls( 57 | country = country, 58 | tier = tier, 59 | gender = gender, 60 | season_end_year = latest_season 61 | ) 62 | date_rgx <- sprintf('(%s)-[0-9]{1,2}-20[0-9]{2}', paste0(month.name, collapse = '|')) 63 | match_names <- basename(match_urls) 64 | match_dates <- match_names |> 65 | # stringr::str_extract() |> 66 | regmatches(regexpr(date_rgx, match_names)) |> 67 | lubridate::mdy() 68 | 69 | current_date <- Sys.Date() 70 | diffs <- as.integer(lubridate::as.difftime(current_date - match_dates, units = 'days')) 71 | discarded_match_urls <- match_urls[diffs <= date_threshold] 72 | retained_match_urls <- match_urls[diffs > date_threshold] 73 | 74 | existing_match_shooting <- read_worldfootballr_rds( 75 | name = name, 76 | tag = fb_match_shooting_tag 77 | ) 78 | existing_match_urls <- unique(existing_match_shooting$MatchURL) 79 | new_match_urls <- setdiff(retained_match_urls, setdiff(existing_match_urls, discarded_match_urls)) 80 | 81 | if (length(new_match_urls) == 0) { 82 | message(sprintf('Not updating data for `country = "%s"`, `gender = "%s"`, `tier = "%s"`.', country, gender, tier)) 83 | return(existing_match_shooting) 84 | } 85 | 86 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC') 87 | 88 | new_match_shooting <- new_match_urls |> 89 | rlang::set_names() |> 90 | purrr::map_dfr( 91 | possibly_scrape_fb_match_shooting, 92 | .id = 'MatchURL' 93 | ) |> 94 | dplyr::relocate(MatchURL, .before = 1) 95 | 96 | match_results <- worldfootballR::load_match_results( 97 | country = country, 98 | tier = tier, 99 | gender = gender, 100 | season_end_year = filtered_seasons 101 | ) 102 | 103 | match_shooting <- dplyr::bind_rows( 104 | existing_match_shooting |> 105 | dplyr::filter(!(MatchURL %in% discarded_match_urls)), 106 | new_match_shooting |> 107 | dplyr::inner_join( 108 | match_results |> 109 | dplyr::transmute( 110 | Competition_Name, 111 | Gender, 112 | Country, 113 | Tier = .env$tier, 114 | Season_End_Year, 115 | MatchURL 116 | ) 117 | ) 118 | ) |> 119 | tibble::as_tibble() 120 | 121 | attr(match_shooting, 'scrape_timestamp') <- scrape_time_utc 122 | 123 | write_worldfootballr_rds_and_csv( 124 | x = match_shooting, 125 | name = name, 126 | tag = fb_match_shooting_tag 127 | ) 128 | 129 | match_shooting 130 | } 131 | 132 | params |> 133 | dplyr::mutate( 134 | data = purrr::pmap( 135 | list( 136 | country, 137 | gender, 138 | tier 139 | ), 140 | ~update_fb_match_shooting( 141 | country = ..1, 142 | gender = ..2, 143 | tier = ..3 144 | ) 145 | ) 146 | ) 147 | 148 | -------------------------------------------------------------------------------- /R/fb_match_summary/backfill_fb_match_summary.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | 9 | PARENT_DATA_DIR <- file.path('data', 'fb_match_summary') 10 | SUB_DATA_DIR <- file.path(PARENT_DATA_DIR, 'matches') 11 | dir.create(SUB_DATA_DIR, showWarnings = FALSE, recursive = FALSE) 12 | 13 | source(file.path('R', 'fb_match_summary', 'shared_fb_match_summary.R')) 14 | 15 | scrape_fb_match_summary <- function(url, data_dir, overwrite = FALSE) { 16 | rds_path <- file.path(data_dir, sprintf('%s.rds', basename(url))) 17 | if (!dir.exists(dirname(rds_path))) { dir.create(dirname(rds_path), showWarnings = FALSE, recursive = TRUE) } 18 | 19 | suffix <- sprintf('for `url = "%s"`.', url) 20 | if (file.exists(rds_path) & !overwrite) { 21 | return(readr::read_rds(rds_path)) 22 | } 23 | message(sprintf('Scraping data %s', suffix)) 24 | stats <- worldfootballR::fb_match_summary(url) 25 | readr::write_rds(stats, rds_path) 26 | stats 27 | } 28 | 29 | possibly_scrape_fb_match_summary <- purrr::possibly( 30 | scrape_fb_match_summary, 31 | otherwise = tibble::tibble(), 32 | quiet = FALSE 33 | ) 34 | 35 | backfill_fb_match_summary <- function( 36 | country = 'ENG', 37 | gender = 'M', 38 | tier = '1st', 39 | group = 'big5' 40 | ) { 41 | 42 | rds_path <- file.path(PARENT_DATA_DIR, sprintf('%s_%s_%s_match_summary.rds', country, gender, tier)) 43 | message(sprintf('Updating %s.', rds_path)) 44 | 45 | first_season_end_year <- ifelse( 46 | group == 'big5', 47 | 2018, 48 | 2019 49 | ) 50 | 51 | last_season_end_year <- lubridate::year(Sys.Date()) + 1L 52 | season_end_years <- first_season_end_year:last_season_end_year 53 | 54 | res <- purrr::map_dfr( 55 | season_end_years, 56 | function(season_end_year) { 57 | 58 | season_path <- file.path(SUB_DATA_DIR, country, gender, tier, paste0(season_end_year, '.rds')) 59 | if (season_end_year < last_season_end_year & file.exists(season_path)) { 60 | return(readRDS(season_path)) 61 | } 62 | 63 | match_urls <- worldfootballR::fb_match_urls( 64 | country = country, 65 | tier = tier, 66 | gender = gender, 67 | season_end_year = season_end_year 68 | ) 69 | 70 | if (length(match_urls) == 0) { 71 | warning( 72 | sprintf('No match URLs for `country = "%s"`, `gender = "%s"`, `tier = "%s"`, `season_end_year = %s`.', country, gender, tier, season_end_year) 73 | ) 74 | return(tibble::tibble()) 75 | } 76 | 77 | new_data <- match_urls |> 78 | rlang::set_names() |> 79 | purrr::map_dfr( 80 | \(.x) possibly_scrape_fb_match_summary( 81 | url = .x, 82 | data_dir = file.path(SUB_DATA_DIR, country, gender, tier, season_end_year) 83 | ), 84 | .id = 'MatchURL' 85 | ) |> 86 | dplyr::relocate(MatchURL, .before = 1) 87 | 88 | ## for the URLs 89 | match_results <- worldfootballR::load_match_results( 90 | country = country, 91 | tier = tier, 92 | gender = gender, 93 | season_end_year = season_end_year 94 | ) 95 | 96 | res <- new_data |> 97 | dplyr::inner_join( 98 | match_results |> 99 | dplyr::transmute( 100 | Competition_Name, 101 | Gender, 102 | Country, 103 | Tier = .env$tier, 104 | Season_End_Year, 105 | MatchURL 106 | ), 107 | by = dplyr::join_by(MatchURL) 108 | ) |> 109 | tibble::as_tibble() 110 | saveRDS(res, season_path) 111 | res 112 | } 113 | ) 114 | 115 | attr(res, 'scrape_timestamp') <- as.POSIXlt(Sys.time(), tz = 'UTC') 116 | readr::write_rds( 117 | res, 118 | rds_path 119 | ) 120 | 121 | invisible(res) 122 | } 123 | 124 | local_data <- params |> 125 | filter(country == 'ENG', gender == 'F') |> 126 | dplyr::mutate( 127 | data = purrr::pmap( 128 | list( 129 | country, 130 | gender, 131 | tier, 132 | group 133 | ), 134 | ~backfill_fb_match_summary( 135 | country = ..1, 136 | gender = ..2, 137 | tier = ..3, 138 | group = ..4 139 | ) 140 | ) 141 | ) 142 | 143 | ## could just put this in the function, but i want to check locally before i upload 144 | source(file.path('R', 'piggyback.R')) 145 | local_data |> 146 | mutate( 147 | name = sprintf('%s_%s_%s_match_summary', country, gender, tier), 148 | res = map2( 149 | data, 150 | name, 151 | ~{ 152 | write_worldfootballr_rds_and_csv( 153 | x = .x, 154 | name = .y, 155 | tag = 'fb_match_summary' 156 | ) 157 | } 158 | ) 159 | ) 160 | -------------------------------------------------------------------------------- /R/fb_match_summary/shared_fb_match_summary.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | params <- bind_rows( 3 | 'big5' = list( 4 | country = c('ENG', 'ESP', 'FRA', 'GER', 'ITA'), 5 | tier = '1st', 6 | gender = 'M' 7 | ), 8 | 'other_1st_M' = list( 9 | country = c('POR', 'NED', 'BRA', 'MEX', 'USA'), 10 | tier = '1st', 11 | gender = 'M' 12 | ), 13 | '1st_F' = list( 14 | country = c('ENG', 'USA'), 15 | tier = '1st', 16 | gender = 'F' 17 | ), 18 | '2nd_M' = list( 19 | country = c('ENG'), 20 | tier = '2nd', 21 | gender = 'M' 22 | ), 23 | .id = 'group' 24 | ) 25 | -------------------------------------------------------------------------------- /R/fb_match_summary/update_fb_match_summary.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyr) 3 | library(dplyr) 4 | library(readr) 5 | library(purrr) 6 | library(tibble) 7 | library(rlang) 8 | 9 | source(file.path('R', 'piggyback.R')) 10 | source(file.path('R', 'fb_match_summary', 'shared_fb_match_summary.R')) 11 | 12 | all_seasons <- read_csv( 13 | 'https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv' 14 | ) 15 | 16 | seasons <- all_seasons |> 17 | semi_join( 18 | params, 19 | by = c('country', 'tier', 'gender') 20 | ) |> 21 | filter(season_end_year >= 2017L) |> 22 | distinct( 23 | country, 24 | gender, 25 | tier, 26 | season_end_year 27 | ) 28 | 29 | scrape_fb_match_summary <- function(match_url) { 30 | message(sprintf('Scraping matches for %s.', match_url)) 31 | fb_match_summary(match_url) 32 | } 33 | 34 | possibly_scrape_fb_match_summary <- possibly( 35 | scrape_fb_match_summary, 36 | otherwise = tibble(), 37 | quiet = FALSE 38 | ) 39 | 40 | fb_match_summary_tag <- 'fb_match_summary' 41 | update_fb_match_summary <- function(country = 'ENG', gender = 'M', tier = '1st') { 42 | name <- sprintf('%s_%s_%s_match_summary', country, gender, tier) 43 | message(sprintf('Updating %s.', name)) 44 | 45 | filtered_seasons <- seasons |> 46 | filter( 47 | country == !!country, 48 | gender == !!gender, 49 | tier == !!tier 50 | ) |> 51 | pull(season_end_year) 52 | 53 | latest_season <- max(filtered_seasons) 54 | 55 | match_urls <- fb_match_urls( 56 | country = country, 57 | tier = tier, 58 | gender = gender, 59 | season_end_year = latest_season 60 | ) 61 | 62 | existing_match_summary <- read_worldfootballr_rds( 63 | name = name, 64 | tag = fb_match_summary_tag 65 | ) 66 | existing_match_urls <- unique(existing_match_summary$MatchURL) 67 | new_match_urls <- setdiff(match_urls, existing_match_urls) 68 | 69 | if (length(new_match_urls) == 0) { 70 | message(sprintf('Not updating data for `country = "%s"`, `gender = "%s"`, `tier = "%s"`.', country, gender, tier)) 71 | return(existing_match_summary) 72 | } 73 | 74 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = 'UTC') 75 | 76 | new_match_summary <- new_match_urls |> 77 | set_names() |> 78 | map_dfr( 79 | possibly_scrape_fb_match_summary, 80 | .id = 'MatchURL' 81 | ) |> 82 | relocate(MatchURL, .before = 1) 83 | 84 | match_results <- load_match_results( 85 | country = country, 86 | tier = tier, 87 | gender = gender, 88 | season_end_year = filtered_seasons 89 | ) 90 | 91 | match_summary <- bind_rows( 92 | existing_match_summary, 93 | new_match_summary |> 94 | inner_join( 95 | match_results |> 96 | transmute( 97 | Competition_Name, 98 | Gender, 99 | Country, 100 | Tier = .env$tier, 101 | Season_End_Year, 102 | MatchURL 103 | ) 104 | ) 105 | ) |> 106 | as_tibble() 107 | 108 | attr(match_summary, 'scrape_timestamp') <- scrape_time_utc 109 | 110 | write_worldfootballr_rds_and_csv( 111 | x = match_summary, 112 | name = name, 113 | tag = fb_match_summary_tag 114 | ) 115 | 116 | match_summary 117 | } 118 | 119 | params |> 120 | mutate( 121 | data = pmap( 122 | list( 123 | country, 124 | gender, 125 | tier 126 | ), 127 | ~update_fb_match_summary( 128 | country = ..1, 129 | gender = ..2, 130 | tier = ..3 131 | ) 132 | ) 133 | ) 134 | 135 | -------------------------------------------------------------------------------- /R/piggyback.R: -------------------------------------------------------------------------------- 1 | library(purrr) 2 | library(readr) 3 | library(piggyback) 4 | 5 | write_csv2 <- purrr::partial( 6 | readr::write_csv, 7 | na = "", 8 | ... = 9 | ) 10 | 11 | worldfootballr_repo <- "JaseZiv/worldfootballR_data" 12 | write_worldfootballr <- function(x, name, tag, ext = c("rds", "csv")) { 13 | ext <- match.arg(ext) 14 | dir <- tempdir(check = TRUE) 15 | basename <- sprintf("%s.%s", name, ext) 16 | path <- file.path(dir, basename) 17 | f <- switch( 18 | ext, 19 | "rds" = readr::write_rds, 20 | "csv" = write_csv2 21 | ) 22 | f(x, path) 23 | piggyback::pb_upload( 24 | path, 25 | repo = worldfootballr_repo, 26 | tag = tag 27 | ) 28 | } 29 | 30 | write_worldfootballr_rds_and_csv <- function(x, name, tag) { 31 | purrr::walk( 32 | c("rds", "csv"), 33 | ~write_worldfootballr( 34 | x = x, 35 | name = name, 36 | tag = tag, 37 | ext = .x 38 | ) 39 | ) 40 | } 41 | 42 | read_worldfootballr_rds <- function(name, tag) { 43 | path <- sprintf("https://github.com/%s/releases/download/%s/%s.rds", worldfootballr_repo, tag, name) 44 | readRDS(url(path)) 45 | } 46 | 47 | read_worldfootballr_csv <- function(name, tag) { 48 | path <- sprintf("https://github.com/%s/releases/download/%s/%s.csv", worldfootballr_repo, tag, name) 49 | read.csv(path) 50 | } 51 | 52 | safely_read_worldfootballr_rds <- purrr::safely(read_worldfootballr_rds) 53 | 54 | read_worldfootballr <- function(name, tag) { 55 | res <- safely_read_worldfootballr_rds(name, tag) 56 | if (is.null(res$error)) { 57 | return(res$result) 58 | } 59 | message( 60 | sprintf( 61 | 'Missing RDS file at `name = "%s"` (`tag: "%s"`).\nTrying to read from the CSV.', 62 | name, 63 | tag 64 | ) 65 | ) 66 | read_worldfootballr_csv(name, tag) 67 | } 68 | -------------------------------------------------------------------------------- /R/tm_player_vals/backfill_big5_player_vals.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | 6 | for(each_season in c(2010:2022)) { 7 | print(paste0("scraping season: ", each_season)) 8 | 9 | each_df <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 10 | start_year = each_season) 11 | df <- bind_rows(df, each_df) 12 | } 13 | 14 | 15 | saveRDS(full, here("data", "tm_player_vals", "big5_player_vals.rds")) 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /R/tm_player_vals/update_big5_player_vals.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | library(rvest) 5 | library(xml2) 6 | 7 | 8 | existing <- readRDS(here("data", "tm_player_vals", "big5_player_vals.rds")) 9 | 10 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 11 | 12 | # need to get the latest season available for the big 5 on transfermarkt (using the EPL as the proxy here) 13 | epl_url <- "https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1" 14 | pg <- read_html(epl_url) 15 | max_season <- pg %>% html_nodes(".chzn-select option") %>% html_attr("value") %>% purrr::pluck(1) %>% as.numeric() 16 | 17 | 18 | # if the latest season is the same as the last season we currently have, update just that season 19 | if(max(existing$season_start_year, na.rm = T) == max_season) { 20 | 21 | print(paste0("Scraping data to update current season (", max_season, ")")) 22 | 23 | update_season <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 24 | start_year = max_season) 25 | 26 | existing_except_new <- existing %>% 27 | filter(season_start_year != max_season) 28 | 29 | new_df <- bind_rows( 30 | existing_except_new, 31 | update_season 32 | ) 33 | 34 | # if the latest season on the site is ahead of the latest data we have stores, then append the new data 35 | } else if(max(existing$season_start_year, na.rm = T) > max_season) { 36 | 37 | print(paste0("Scraping data to get new season (", max_season, ")")) 38 | 39 | update_season <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 40 | start_year = max_season) 41 | 42 | new_df <- bind_rows( 43 | existing, 44 | update_season 45 | ) 46 | # otherwise, error, because we don't want to overwrite a season aleady scraped 47 | } else { 48 | stop(paste0("There is an error and this process might incorrectly overwrite existing data as the latest season available at ", 49 | epl_url, " is less than the last season data extracted for, which is the season starting ", 50 | max(existing$season_start_year))) 51 | } 52 | 53 | # then if a new df has been created, then write it to file 54 | if(nrow(new_df) > 0) { 55 | attr(new_df, "scrape_timestamp") <- scrape_time_utc 56 | saveRDS(new_df, here("data", "tm_player_vals", "big5_player_vals.rds")) 57 | } 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /R/tm_transfers/backfill_big5_transfers.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(dplyr) 3 | library(here) 4 | 5 | 6 | countries <- c("England", "Italy", "Germany", "Spain", "France") 7 | all_transfers <- data.frame() 8 | 9 | for(each_country in countries) { 10 | 11 | each_season_df <- data.frame() 12 | 13 | for(i in c(2010:2022)) { 14 | print(paste0("Scraping country: ", each_country, " for season: ", i)) 15 | urls <- tm_league_team_urls(country_name = each_country, start_year = i) 16 | season_transfers <- tm_team_transfers(urls) 17 | each_season_df <- rbind(each_season_df, season_transfers) 18 | } 19 | 20 | all_transfers <- rbind(all_transfers, each_season_df) 21 | 22 | } 23 | 24 | 25 | 26 | # because the initial scrape was conducted Sep 2022, the current leagues were assigned to teams, but what we want is the relevant 27 | # league we wanted for the season scraped. Additionally, there are two teams who no longer exist, so these need to be mapped. 28 | # Will manually coerce these here: 29 | 30 | all_transfers <- all_transfers %>% 31 | dplyr::mutate( 32 | country = 33 | dplyr::case_when( 34 | team_name == "Athlétic Club Arlésien" ~ "France", 35 | team_name == "Chievo Verona" ~ "Italy", 36 | TRUE ~ country 37 | ), 38 | league = 39 | dplyr::case_when( 40 | country == "England" ~ "Premier League", 41 | country == "France" ~ "Ligue 1", 42 | country == "Germany" ~ "Bundesliga", 43 | country == "Italy" ~ "Serie A", 44 | country == "Spain" ~ "LaLiga" 45 | ) 46 | ) 47 | 48 | 49 | saveRDS(all_transfers, here::here("data", "tm_transfers", "big_5_transfers.rds")) -------------------------------------------------------------------------------- /R/understat_league_shots/backup_understat_local.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(dplyr) 3 | library(janitor) 4 | 5 | setwd(paste0(here::here(), "/data/understat_shots")) 6 | 7 | 8 | leagues <- c("EPL", "La liga", "Bundesliga", "Serie A", "Ligue 1", "RFPL") 9 | 10 | for(each_league in leagues) { 11 | if(each_league == "La liga") { 12 | each_league_clean <- "La_liga" 13 | } else if (each_league == "Serie A") { 14 | each_league_clean <- "Serie_A" 15 | } else if (each_league == "Ligue 1") { 16 | each_league_clean <- "Ligue_1" 17 | } else { 18 | each_league_clean <- each_league 19 | } 20 | 21 | 22 | league_name_clean <- janitor::make_clean_names(each_league) 23 | 24 | f <- read_worldfootballr_rds(name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots") %>% 25 | mutate(minute = as.numeric(minute)) 26 | 27 | saveRDS(f, paste0(league_name_clean, "_shot_data.rds")) 28 | } 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /R/understat_league_shots/update_understat_shots.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | # set the working directory to make reading and writing easier 6 | # setwd(here("data", "understat_shots")) 7 | 8 | source("R/piggyback.R") 9 | 10 | # valid league names for scraping data 11 | leagues <- c("EPL", "La liga", "Bundesliga", "Serie A", "Ligue 1", "RFPL") 12 | 13 | 14 | .get_understat_json <- function(page_url) { 15 | tryCatch( 16 | httr::GET(page_url, httr::set_cookies(.cookies = c("beget" = "begetok"))) %>% httr::content(), 17 | error = function(e) NA 18 | ) 19 | } 20 | 21 | 22 | for(each_league in leagues) { 23 | scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") 24 | 25 | if(each_league == "La liga") { 26 | each_league_clean <- "La_liga" 27 | } else if (each_league == "Serie A") { 28 | each_league_clean <- "Serie_A" 29 | } else if (each_league == "Ligue 1") { 30 | each_league_clean <- "Ligue_1" 31 | } else { 32 | each_league_clean <- each_league 33 | } 34 | 35 | # first we want to get the current season: 36 | main_url <- "https://understat.com/" 37 | page_url <- paste0(main_url, "league/", each_league_clean) 38 | page <- tryCatch( .get_understat_json(page_url), error = function(e) NA) 39 | 40 | season_element <- page %>% rvest::html_nodes(xpath = '//*[@name="season"]') %>% 41 | rvest::html_nodes("option") 42 | season <- season_element %>% rvest::html_attr("value") %>% as.numeric() %>% max(na.rm = T) 43 | 44 | # also need to read in the existing shot data file to see which games have not yet been collected: 45 | # to do this, we need to clean the valid league names to match the file structure 46 | league_name_clean <- janitor::make_clean_names(each_league) 47 | # then read in data 48 | f <- read_worldfootballr_rds(name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots") %>% 49 | mutate(minute = as.numeric(minute)) 50 | 51 | # need to manually coerce columns to numeric as of the start of 22/23 season to match old data 52 | f <- f %>% 53 | mutate( 54 | id = as.numeric(id), 55 | player_id = as.numeric(player_id), 56 | season = as.numeric(season), 57 | match_id = as.numeric(match_id) 58 | ) 59 | 60 | # also need to read in the match data to get all match IDs, to then compare which matches have been played (and will then have shot data) 61 | match_data <- tryCatch(worldfootballR::understat_league_match_results(league = each_league, season_start_year = season), error = function(e) data.frame()) 62 | 63 | if(nrow(match_data) != 0) { 64 | match_data <- match_data %>% filter(isResult == TRUE) 65 | # only want to keep those match IDs for which we don't have shot data for 66 | missing_ids <- match_data$match_id[!match_data$match_id %in% f$match_id] 67 | } else { 68 | missing_ids <-c() 69 | } 70 | 71 | # then, if there are any matches where we don't already have shot data, go and get them 72 | if(length(missing_ids) > 0) { 73 | match_urls <- paste0("https://understat.com/match/", missing_ids) 74 | 75 | shots <- match_urls %>% purrr::map_df(worldfootballR::understat_match_shots) 76 | # there must have been a change to the json data exposed by Understat at some point, so we manually set it now 77 | shots$league <- each_league 78 | # need to manually coerce columns to numeric as of the start of 22/23 season to match old data 79 | shots <- shots %>% 80 | mutate( 81 | id = as.numeric(id), 82 | player_id = as.numeric(player_id), 83 | season = as.numeric(season), 84 | match_id = as.numeric(match_id) 85 | ) 86 | 87 | # column names were slightly different prior to the 2021/2022 season - we want to keep these consistent 88 | if(any(grepl("last_action", names(shots)))) { 89 | shots <- shots %>% 90 | rename(X=x, Y=y, xG=x_g, shotType=shot_type, lastAction=last_action) 91 | } 92 | # join them all together 93 | f <- bind_rows(f, shots) 94 | } 95 | 96 | # now write the file again, regardless of whether there was new data. Will also freshly timestamp the rds 97 | attr(f, "scrape_timestamp") <- scrape_time_utc 98 | 99 | write_worldfootballr(x=f, name=paste0(league_name_clean, "_shot_data"), tag = "understat_shots", ext = "rds") 100 | 101 | } 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # worldfootballR_data 2 | 3 | 4 | ![run_scrapes](https://github.com/JaseZiv/worldfootballR_data/workflows/run_extracts/badge.svg) 5 | 6 | 7 | # worldfootballR_data 8 | Repository to hold various data sets scraped from the sites supported in the [`worldfootballR`](https://github.com/JaseZiv/worldfootballR) package. Current sites include: 9 | 10 | * fbref.com 11 | * transfermarkt.com 12 | * understat.com 13 | * fotmob.com 14 | 15 | *** 16 | 17 | ## Show your support 18 | Follow me on Twitter ([jaseziv](https://twitter.com/jaseziv)) for updates 19 | 20 | If this data helps you, all I ask is that you star this repo. If you did want to show your support and contribute to server time and data storage costs, feel free to send a small donation through the link below. 21 | 22 | Coffee (Server Time) 23 | 24 | *** 25 | 26 | ## The Data 27 | 28 | The data can be split up into two main categories: 29 | 30 | ### 1. Supporting data to help with the functions in `worldfootballR`: 31 | 32 | * [Fbref Comps and Leagues](https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/all_leages_and_cups/all_competitions.csv) 33 | * [Transfermarkt Leagues](https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv) 34 | * [Mapping between FBref and Transfermarkt Players](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv) 35 | 36 | 37 | ### 2. Data sets used in the `load_` functions in `worldfootballR`: 38 | 39 | * [FBref Big 5 League Advanced season stats](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/fb_big5_advanced_season_stats) 40 | 41 | For players and teams, all advanced statistic data available on the site 42 | 43 | * [FBref match results - Domestic Leagues](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/match_results) 44 | 45 | Includes match results played for all domestic leagues available on the site, for all years match results are listed under the fixtures section of leagues 46 | 47 | * [FBref match results - International matches and domestic cups](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/match_results_cups) 48 | 49 | Includes match results played for all domestic cups and international matches available on the site, for all years match results are listed under the fixtures section of cups/comps 50 | 51 | * [Understat shot locations for the Big 5 leagues and RFPL](https://github.com/JaseZiv/worldfootballR_data/tree/master/data/understat_shots) 52 | 53 | Shooting data and locations for the big 5 leagues and the RFPL since the 2014/15 seasons. 54 | 55 | Shout out to [Mark Wilkins](https://twitter.com/biscuitchaser) for supplying the original data dump of the seasons for all big 5 leagues from 2014/15 to 2021/22. The data was originally [here](https://github.com/Markjwilkins/Understat) 56 | -------------------------------------------------------------------------------- /data/fb_advanced_match_stats/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_defense.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_defense.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_gca.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_gca.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_keepers.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_keepers.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_keepers_adv.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_keepers_adv.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_misc.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_misc.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_passing.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_passing.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_passing_types.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_passing_types.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_playing_time.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_playing_time.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_possession.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_possession.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_shooting.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_shooting.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_player_standard.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_player_standard.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_defense.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_defense.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_gca.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_gca.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_keepers.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_keepers.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_keepers_adv.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_keepers_adv.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_misc.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_misc.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_passing.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_passing.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_passing_types.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_passing_types.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_playing_time.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_playing_time.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_possession.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_possession.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_shooting.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_shooting.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_season_stats/big5_team_standard.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_season_stats/big5_team_standard.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/README.md: -------------------------------------------------------------------------------- 1 | # StatsBomb via FBRef 2 | 3 | FBRef changed data providers from StatsBomb to Opta late October 2022. This meant that all previously displayed data changed overnight to reflect Opta's counting/estimating of statistics. 4 | 5 | For any analysts looking to maintain previous analysis, or to be able to compare StatsBomb and Opta, use the data files in this directory for StatsBomb data, which was last updated 2022-08-22. 6 | 7 | For the equivalent Opta data sets, see the `data/fb_big5_advanced_season_stats` directory in this repository. -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_defense.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_defense.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_gca.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_gca.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_keepers.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_keepers.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_keepers_adv.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_keepers_adv.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_misc.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_misc.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_passing.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_passing.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_passing_types.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_passing_types.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_playing_time.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_playing_time.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_possession.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_possession.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_shooting.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_shooting.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_player_standard.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_player_standard.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_defense.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_defense.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_gca.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_gca.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_keepers.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_keepers.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_keepers_adv.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_keepers_adv.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_misc.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_misc.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_passing.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_passing.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_passing_types.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_passing_types.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_playing_time.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_playing_time.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_possession.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_possession.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_shooting.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_shooting.rds -------------------------------------------------------------------------------- /data/fb_big5_advanced_statsbomb/big5_team_standard.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/fb_big5_advanced_statsbomb/big5_team_standard.rds -------------------------------------------------------------------------------- /data/fb_match_shooting/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/fb_match_summary/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /data/fotmob_match_details/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/match_results/ARG_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ARG_match_results.rds -------------------------------------------------------------------------------- /data/match_results/AUS_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/AUS_match_results.rds -------------------------------------------------------------------------------- /data/match_results/AUT_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/AUT_match_results.rds -------------------------------------------------------------------------------- /data/match_results/BEL_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BEL_match_results.rds -------------------------------------------------------------------------------- /data/match_results/BOL_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BOL_match_results.rds -------------------------------------------------------------------------------- /data/match_results/BRA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BRA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/BUL_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/BUL_match_results.rds -------------------------------------------------------------------------------- /data/match_results/CAN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CAN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/CHI_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CHI_match_results.rds -------------------------------------------------------------------------------- /data/match_results/CHN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CHN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/COL_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/COL_match_results.rds -------------------------------------------------------------------------------- /data/match_results/CRO_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CRO_match_results.rds -------------------------------------------------------------------------------- /data/match_results/CZE_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/CZE_match_results.rds -------------------------------------------------------------------------------- /data/match_results/DEN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/DEN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/ECU_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ECU_match_results.rds -------------------------------------------------------------------------------- /data/match_results/ENG_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ENG_match_results.rds -------------------------------------------------------------------------------- /data/match_results/ESP_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ESP_match_results.rds -------------------------------------------------------------------------------- /data/match_results/FIN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/FIN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/FRA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/FRA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/GER_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/GER_match_results.rds -------------------------------------------------------------------------------- /data/match_results/GRE_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/GRE_match_results.rds -------------------------------------------------------------------------------- /data/match_results/HUN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/HUN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/IND_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/IND_match_results.rds -------------------------------------------------------------------------------- /data/match_results/IRN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/IRN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/ITA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ITA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/JPN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/JPN_match_results.rds -------------------------------------------------------------------------------- /data/match_results/KOR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/KOR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/KSA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/KSA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/MEX_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/MEX_match_results.rds -------------------------------------------------------------------------------- /data/match_results/NED_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/NED_match_results.rds -------------------------------------------------------------------------------- /data/match_results/NOR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/NOR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/PAR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/PAR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/PER_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/PER_match_results.rds -------------------------------------------------------------------------------- /data/match_results/POL_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/POL_match_results.rds -------------------------------------------------------------------------------- /data/match_results/POR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/POR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/ROU_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/ROU_match_results.rds -------------------------------------------------------------------------------- /data/match_results/RSA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/RSA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/RUS_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/RUS_match_results.rds -------------------------------------------------------------------------------- /data/match_results/SCO_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SCO_match_results.rds -------------------------------------------------------------------------------- /data/match_results/SRB_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SRB_match_results.rds -------------------------------------------------------------------------------- /data/match_results/SUI_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SUI_match_results.rds -------------------------------------------------------------------------------- /data/match_results/SWE_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/SWE_match_results.rds -------------------------------------------------------------------------------- /data/match_results/TUR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/TUR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/UKR_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/UKR_match_results.rds -------------------------------------------------------------------------------- /data/match_results/URU_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/URU_match_results.rds -------------------------------------------------------------------------------- /data/match_results/USA_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/USA_match_results.rds -------------------------------------------------------------------------------- /data/match_results/VEN_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results/VEN_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ## Competition Names for `worldfootballR::load_match_comp_results()` 6 | 7 | 8 | 9 | The below is a list of all the available competition names to pass to the `comp_name` value in the `worldfootballR::load_match_comp_results()` function: 10 | 11 | ```{r, echo=FALSE, warning=FALSE, message=FALSE} 12 | library(dplyr) 13 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 14 | 15 | # the below cups are one off matches so we don't need scores and fixtures for these: 16 | exclusion_cups <- c("UEFA Super Cup", "FA Community Shield", "Supercopa de España", "Trophée des Champions", "DFL-Supercup", "Supercoppa Italiana") 17 | 18 | latest_cup_seasons <- seasons %>% 19 | # filtering out things that aren't domestic leagues: 20 | filter(!stringr::str_detect(.data$competition_type, "Leagues"), 21 | # and also the single match type cup games: 22 | !.data$competition_name %in% exclusion_cups) %>% 23 | group_by(competition_name) %>% slice_max(season_end_year) %>% 24 | distinct() %>% 25 | select(competition_type,competition_name,country,gender,governing_body,first_season,last_season,tier) 26 | 27 | latest_cup_seasons %>% pull(competition_name) 28 | ``` 29 | 30 | -------------------------------------------------------------------------------- /data/match_results_cups/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Competition Names for `worldfootballR::load_match_comp_results()` 3 | 4 | 5 | 6 | The below is a list of all the available competition names to pass to 7 | the `comp_name` value in the `worldfootballR::load_match_comp_results()` 8 | function: 9 | 10 | ## [1] "AFC Asian Cup" 11 | ## [2] "AFC Asian Cup qualification" 12 | ## [3] "AFC Women's Asian Cup" 13 | ## [4] "AFC Women's Asian Cup Qualification" 14 | ## [5] "Africa Cup of Nations" 15 | ## [6] "Africa Cup of Nations qualification" 16 | ## [7] "Africa Women Cup of Nations" 17 | ## [8] "Algarve Cup" 18 | ## [9] "CONCACAF Gold Cup" 19 | ## [10] "CONCACAF W Championship" 20 | ## [11] "Copa America" 21 | ## [12] "Copa América Femenina" 22 | ## [13] "Copa del Rey" 23 | ## [14] "Copa Libertadores de América" 24 | ## [15] "Copa Sudamericana" 25 | ## [16] "Coppa Italia" 26 | ## [17] "Coupe de France" 27 | ## [18] "Coupe de la Ligue" 28 | ## [19] "DFB-Pokal" 29 | ## [20] "DFB-Pokal Frauen" 30 | ## [21] "English Football League Cup" 31 | ## [22] "European Championship" 32 | ## [23] "FA Cup" 33 | ## [24] "FIFA Confederations Cup" 34 | ## [25] "FIFA Women's World Cup" 35 | ## [26] "FIFA Women's World Cup Qualification (UEFA)" 36 | ## [27] "FIFA World Cup" 37 | ## [28] "FIFA World Cup Qualification — AFC" 38 | ## [29] "FIFA World Cup Qualification — CAF" 39 | ## [30] "FIFA World Cup Qualification — CONCACAF" 40 | ## [31] "FIFA World Cup Qualification — CONMEBOL" 41 | ## [32] "FIFA World Cup Qualification — OFC" 42 | ## [33] "FIFA World Cup Qualification — UEFA" 43 | ## [34] "International Friendlies (M)" 44 | ## [35] "International Friendlies (W)" 45 | ## [36] "NWSL Challenge Cup" 46 | ## [37] "NWSL Fall Series" 47 | ## [38] "OFC Nations Cup" 48 | ## [39] "OFC Women's Nations Cup" 49 | ## [40] "Olympics – Women's Tournament" 50 | ## [41] "SheBelieves Cup" 51 | ## [42] "UEFA Champions League" 52 | ## [43] "UEFA Euro Qualification" 53 | ## [44] "UEFA Europa Conference League" 54 | ## [45] "UEFA Europa League" 55 | ## [46] "UEFA Nations League" 56 | ## [47] "UEFA Women's Champions League" 57 | ## [48] "UEFA Women's Championship" 58 | ## [49] "UEFA Women's Euro Qualification" 59 | -------------------------------------------------------------------------------- /data/match_results_cups/afc_asian_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_asian_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/afc_asian_cup_qualification_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_asian_cup_qualification_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/afc_womens_asian_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_womens_asian_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/afc_womens_asian_cup_qualification_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/afc_womens_asian_cup_qualification_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/africa_cup_of_nations_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_cup_of_nations_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/africa_cup_of_nations_qualification_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_cup_of_nations_qualification_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/africa_women_cup_of_nations_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/africa_women_cup_of_nations_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/algarve_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/algarve_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/concacaf_gold_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/concacaf_gold_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/concacaf_w_championship_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/concacaf_w_championship_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/copa_america_femenina_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_america_femenina_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/copa_america_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_america_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/copa_del_rey_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_del_rey_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/copa_libertadores_de_america_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_libertadores_de_america_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/copa_sudamericana_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/copa_sudamericana_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/coppa_italia_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coppa_italia_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/coupe_de_france_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coupe_de_france_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/coupe_de_la_ligue_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/coupe_de_la_ligue_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/dfb_pokal_frauen_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/dfb_pokal_frauen_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/dfb_pokal_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/dfb_pokal_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/efl_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/efl_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/english_football_league_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/english_football_league_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/european_championship_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/european_championship_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fa_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fa_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_confederations_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_confederations_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_womens_world_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_womens_world_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_womens_world_cup_qualification_uefa_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_womens_world_cup_qualification_uefa_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_afc_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_afc_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_caf_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_caf_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_concacaf_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_concacaf_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_conmebol_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_conmebol_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_inter_confederation_play_offs_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_ofc_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_ofc_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/fifa_world_cup_qualification_uefa_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/fifa_world_cup_qualification_uefa_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/international_friendlies_m_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/international_friendlies_m_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/international_friendlies_w_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/international_friendlies_w_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/nwsl_challenge_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/nwsl_challenge_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/nwsl_fall_series_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/nwsl_fall_series_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/ofc_nations_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/ofc_nations_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/ofc_womens_nations_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/ofc_womens_nations_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/olympics_womens_tournament_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/olympics_womens_tournament_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/she_believes_cup_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/she_believes_cup_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_champions_league_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_champions_league_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_euro_qualification_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_euro_qualification_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_europa_conference_league_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_europa_conference_league_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_europa_league_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_europa_league_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_european_football_championship_qualifying_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_european_football_championship_qualifying_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_nations_league_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_nations_league_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_womens_champions_league_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_champions_league_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_womens_championship_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_championship_match_results.rds -------------------------------------------------------------------------------- /data/match_results_cups/uefa_womens_euro_qualification_match_results.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/match_results_cups/uefa_womens_euro_qualification_match_results.rds -------------------------------------------------------------------------------- /data/tm_player_vals/big5_player_vals.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/tm_player_vals/big5_player_vals.rds -------------------------------------------------------------------------------- /data/tm_transfers/big_5_transfers.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/tm_transfers/big_5_transfers.rds -------------------------------------------------------------------------------- /data/understat_shots/bundesliga_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/bundesliga_shot_data.rds -------------------------------------------------------------------------------- /data/understat_shots/epl_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/epl_shot_data.rds -------------------------------------------------------------------------------- /data/understat_shots/la_liga_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/la_liga_shot_data.rds -------------------------------------------------------------------------------- /data/understat_shots/ligue_1_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/ligue_1_shot_data.rds -------------------------------------------------------------------------------- /data/understat_shots/rfpl_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/rfpl_shot_data.rds -------------------------------------------------------------------------------- /data/understat_shots/serie_a_shot_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/data/understat_shots/serie_a_shot_data.rds -------------------------------------------------------------------------------- /man/figures/hex_sticker.R: -------------------------------------------------------------------------------- 1 | 2 | # install.packages("ggsoccer") 3 | library(hexSticker) 4 | library(ggplot2) 5 | library(ggsoccer) 6 | 7 | setwd(here::here("man")) 8 | sysfonts::font_add_google(name = "Chivo", family = "chivo") 9 | sysfonts::font_add_google(name = "Play", family = "play") 10 | 11 | pitch <- ggplot() + 12 | annotate_pitch(fill = "#538032", colour = "grey30") + 13 | theme_pitch() + 14 | theme(panel.background = element_rect(fill = "#538032")) 15 | 16 | 17 | pitch <- pitch + theme_void() + theme_transparent() 18 | 19 | 20 | sticker(pitch, 21 | package="worldfootballR data", 22 | p_family = "play", p_size=11, p_color = "grey30", 23 | s_x=1, s_y=.8, s_width=1.3, s_height=0.85, 24 | h_fill = "#538032", h_color = "grey30", 25 | url = "https://github.com/JaseZiv/worldfootballR_data", u_y = 0.07, u_x = 1.0, u_size = 3, u_color = "white", u_family = "play", 26 | filename="figures/logo.png") 27 | 28 | # smaller size hex logo: 29 | sticker(pitch, 30 | package="worldfootballR_data", 31 | p_family = "play", p_size=11, p_color = "grey30", 32 | s_x=1, s_y=.8, s_width=1.3, s_height=0.85, 33 | h_fill = "#538032", 34 | url = "hhttps://github.com/JaseZiv/worldfootballR_data", u_y = 0.07, u_x = 1.0, u_size = 3, u_color = "white", u_family = "play", 35 | filename="figures/logo_small_size.png") # modify size in viewer to dimensions 181x209 as a png 36 | 37 | 38 | ########################################################################### 39 | # Different Options: ------------------------------------------------------ 40 | 41 | # sticker(pitch, 42 | # package="worldfootballR", 43 | # p_size=6, p_color = "white", 44 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85, 45 | # h_fill = "#538032", 46 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.09, u_x = 1.05, u_size = 1.2, u_color = "white", 47 | # filename="man/figures/logo_standard.png") 48 | # 49 | # 50 | # sticker(pitch, 51 | # package="worldfootballR", 52 | # p_family = "chivo", 53 | # p_size=6, p_color = "white", 54 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85, 55 | # h_fill = "#538032", 56 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.07, u_x = 1.0, u_size = 1.2, u_color = "white", u_family = "chivo", 57 | # filename="man/figures/logo_chivo.png") 58 | # 59 | # 60 | # sticker(pitch, 61 | # package="worldfootballR", 62 | # p_family = "play", 63 | # p_size=6, p_color = "white", 64 | # s_x=1, s_y=.8, s_width=1.3, s_height=0.85, 65 | # h_fill = "#538032", 66 | # h_color = "black", 67 | # spotlight = T, l_y = 0.83, 68 | # url = "https://jaseziv.github.io/worldfootballR/", u_y = 0.07, u_x = 1.0, u_size = 1.2, u_color = "white", u_family = "play", 69 | # filename="man/figures/logo_play_black_border.png") 70 | 71 | 72 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/logo_small_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/man/figures/logo_small_size.png -------------------------------------------------------------------------------- /raw-data/countries_list/get_countries_list.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(rvest) 3 | 4 | fb_country_leagues <- function() { 5 | 6 | main_url <- "https://fbref.com" 7 | countries_page <- xml2::read_html("https://fbref.com/en/countries/") 8 | 9 | country_holder <- countries_page %>% rvest::html_nodes("#countries") %>% rvest::html_nodes("tbody") %>% rvest::html_nodes("tr") 10 | 11 | idx <- 0 12 | countries_df <- data.frame() 13 | 14 | for(each_row in country_holder) { 15 | idx <- idx + 1 16 | countries_df[idx, "country"] <- tryCatch(each_row %>% rvest::html_nodes(".left:nth-child(1) a") %>% rvest::html_text(), error = function(e) NA_character_) 17 | countries_df[idx, "country_url"] <- tryCatch(each_row %>% rvest::html_nodes(".left:nth-child(1) a") %>% rvest::html_attr("href") %>% paste0(main_url, .) %>% paste(collapse = ",\n"), error = function(e) NA_character_) 18 | 19 | if(is_empty(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_text())) { 20 | countries_df[idx, "league_name"] <- NA_character_ 21 | } else { 22 | countries_df[idx, "league_name"] <- tryCatch(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_text() %>% paste(collapse = ",\n"), error = function(e) NA_character_) 23 | } 24 | 25 | if(is_empty(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_attr("href"))) { 26 | countries_df[idx, "league_url"] <- NA_character_ 27 | } else { 28 | countries_df[idx, "league_url"] <- tryCatch(each_row %>% rvest::html_nodes(".right~ .right+ .left") %>% rvest::html_nodes("a") %>% rvest::html_attr("href") %>% paste0(main_url, .) %>% paste(collapse = ",\n"), error = function(e) NA_character_) 29 | } 30 | 31 | } 32 | 33 | countries_df <- countries_df %>% dplyr::mutate(has_leage_page = !is.na(league_name)) 34 | 35 | return(countries_df) 36 | } 37 | 38 | # scrape the data 39 | countries_df <- fb_country_leagues() 40 | 41 | # write the final data 42 | write.csv(countries_df, here::here("raw-data", "countries_list", "countries_df.csv"), row.names = F) 43 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/README.md: -------------------------------------------------------------------------------- 1 | # Mapping FBref and Transfermarkt Players 2 | 3 | This section creates a map of player URLs from FBref players to the relevant player's data on Transfermarkt. 4 | 5 | Currently, the mappings are for players who have played in the following man's leagues since the start of the 2017-18 season: 6 | 7 | * Top 5 European leagues 8 | * MLS 9 | * Eredivise 10 | * Portuguese Primeira Liga 11 | * Campeonato Brasileiro Serie A 12 | * Liga MX 13 | * English Championship 14 | 15 | I aim to update this fairly frequently, so that players who subsequently appear on FBref in these leagues will continue to be mapped. 16 | 17 | *** 18 | 19 | ## Show your support 20 | Follow me on Twitter ([jaseziv](https://twitter.com/jaseziv)) for updates 21 | 22 | If this data helps you, all I ask is that you star this repo. If you did want to show your support and contribute to server time and data storage costs, feel free to send a small donation through the link below. 23 | 24 | Coffee (Server Time) 25 | 26 | *** 27 | 28 | ## Usage 29 | 30 | To update the data, first run `prepare_working_files.R`. This will generate a list of csv outputs. There are two that will potentially need to be actioned: 31 | 32 | * `joined_missing.csv` contains the players who haven't been able to be matched by the automated script. These need to be manually investigated and then overwrite the `joined_missing_manual_fix.csv` file 33 | * `duplicate_players_df.csv` contains a list of players who have been joined using the automated script, however duplicates have arisen. Manually fix these duplicates by removing the spurious matches, then save to file called `duplicate_players_df_manual_fix.csv`. 34 | 35 | Once these files have been manually fixed, run `create_final_data.R` and the final output file will be written to [`output/fbref_to_tm_mapping.csv`](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv). 36 | 37 | ### Update (2021-10-29): Write to Googlesheets 38 | 39 | The project also writes the mapped data to a gogglesheet, found [here](https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932). 40 | 41 | 42 | ### Update (2022-08-11) Update Player Positions 43 | 44 | For players that were mapped originally, some of these may have changed positions since the initial map. To get current TM positions, run the file names `update_player_positions.R`. 45 | 46 | *** 47 | 48 | ## Contributing 49 | 50 | If anyone wants to contribute mapped players for different leagues, feel free to get in touch with me on Twitter [here](https://twitter.com/jaseziv), create an issue in [`worldfootballR`](https://github.com/JaseZiv/worldfootballR) or email me on `jaseziv83@gmail.com`. 51 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/create_final_data.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(googlesheets4) 4 | library(here) 5 | 6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 7 | 8 | # read in files 9 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_finished.csv"), stringsAsFactors = F) 10 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing_manual_fix.csv"), stringsAsFactors = F) 11 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F) %>% 12 | select(-fbref_surname, -player_name), error = function(e) data.frame()) 13 | 14 | 15 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>% 16 | arrange(Player) %>% 17 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url)) 18 | 19 | 20 | matched_data <- matched_data %>% 21 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position) 22 | 23 | # some players won't have a position listed (because they haven't been matched automatically) 24 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt) 25 | 26 | # for these URLs, we can get their positions using the `tm_player_bio` function 27 | missing_pos_bios <- tm_player_bio(player_urls = missing_pos) 28 | 29 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder" 30 | # we only want "Left Midfiender" 31 | missing_pos_bios <- missing_pos_bios %>% 32 | mutate(TmPos = case_when( 33 | grepl(" - ", position) ~ gsub(".*- ", "", position), 34 | TRUE ~ position 35 | )) 36 | 37 | # join the present and missing player data 38 | matched_data <- matched_data %>% 39 | filter(!is.na(UrlTmarkt)) %>% 40 | filter(!is.na(TmPos)) %>% 41 | bind_rows( 42 | matched_data %>% 43 | filter(!is.na(UrlTmarkt)) %>% 44 | filter(is.na(TmPos)) %>% 45 | select(-TmPos) %>% 46 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL")) 47 | ) %>% 48 | arrange(PlayerFBref) 49 | 50 | 51 | # create final output df 52 | final_output <- bind_rows(existing_df, matched_data) %>% 53 | arrange(PlayerFBref) %>% 54 | distinct(UrlFBref, .keep_all=T) 55 | 56 | #============= 57 | # Write Files 58 | #============= 59 | 60 | # write file for commit to GitHub: 61 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE) 62 | 63 | # Write file to Googlesheets: 64 | # get the sheet id 65 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>% 66 | as.character() 67 | 68 | # write the sheet 69 | sheet_write(final_output, 70 | ss, 71 | sheet = "fbref_to_tm_mapping") 72 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/data/tm_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/data/tm_data.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/create_final_data.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(googlesheets4) 4 | library(here) 5 | 6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 7 | 8 | # read in files 9 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "output", "working-files", "joined_finished.csv"), stringsAsFactors = F) 10 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing_manual_fix.csv"), stringsAsFactors = F) 11 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F) %>% 12 | select(-fbref_surname, -player_name), error = function(e) data.frame()) 13 | 14 | 15 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>% 16 | arrange(Player) %>% 17 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url)) 18 | 19 | 20 | matched_data <- matched_data %>% 21 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position) 22 | 23 | # some players won't have a position listed (because they haven't been matched automatically) 24 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt) 25 | 26 | # for these URLs, we can get their positions using the `tm_player_bio` function 27 | missing_pos_bios <- tm_player_bio(player_urls = missing_pos) 28 | 29 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder" 30 | # we only want "Left Midfiender" 31 | missing_pos_bios <- missing_pos_bios %>% 32 | mutate(TmPos = case_when( 33 | grepl(" - ", position) ~ gsub(".*- ", "", position), 34 | TRUE ~ position 35 | )) 36 | 37 | # join the present and missing player data 38 | matched_data <- matched_data %>% 39 | filter(!is.na(UrlTmarkt)) %>% 40 | filter(!is.na(TmPos)) %>% 41 | bind_rows( 42 | matched_data %>% 43 | filter(!is.na(UrlTmarkt)) %>% 44 | filter(is.na(TmPos)) %>% 45 | select(-TmPos) %>% 46 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL")) 47 | ) %>% 48 | arrange(PlayerFBref) 49 | 50 | 51 | # create final output df 52 | final_output <- bind_rows(existing_df, matched_data) %>% 53 | arrange(PlayerFBref) %>% 54 | distinct(UrlFBref, .keep_all=T) 55 | 56 | #============= 57 | # Write Files 58 | #============= 59 | 60 | # write file for commit to GitHub: 61 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE) 62 | 63 | # Write file to Googlesheets: 64 | # get the sheet id 65 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>% 66 | as.character() 67 | 68 | # write the sheet 69 | sheet_write(final_output, 70 | ss, 71 | sheet = "fbref_to_tm_mapping") 72 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/build_mapping_dictionary.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | fbref <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_extra_leagues.rds")) 6 | tm1 <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_extra_tier1.rds")) 7 | tm2 <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_championship.rds")) 8 | 9 | tm <- bind_rows(tm1,tm2) 10 | 11 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 12 | 13 | 14 | fbref <- fbref %>% filter(!Url %in% matched_data$UrlFBref) 15 | 16 | 17 | fbref <- fbref %>% 18 | select(season_end_year, Squad, competition_name, Player, Nation, Born, Url) %>% 19 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>% 20 | arrange(Player, Url, desc(season_end_year)) %>% 21 | distinct(Url, .keep_all = T) 22 | 23 | tm <- tm %>% 24 | select(comp_name, region, country, season_start_year, squad, player_name, player_position, 25 | player_dob, player_nationality, player_market_value_euro, player_url) %>% 26 | arrange(player_name) 27 | 28 | # want a df to help with inspection of names with special characters 29 | tm_unique <- tm %>% 30 | arrange(player_name, player_url, desc(season_start_year)) %>% 31 | distinct(player_name, player_dob, player_url, .keep_all = T) %>% 32 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)), 33 | tm_yob = as.character(lubridate::year(player_dob))) 34 | 35 | #----- primary join type: -----# 36 | # here I will join the two datasets on the player names 37 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 38 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name")) 39 | 40 | joined_primary <- fbref %>% select(Player, Born, Url, Squad, Nation) %>% distinct(Url, .keep_all = T) %>% 41 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob, tm_squad=squad, tm_nationality=player_nationality, player_position) %>% distinct(player_url, .keep_all = T), 42 | by = c("Player" = "player_name", "Born" = "tm_yob")) 43 | 44 | # arrange by player name 45 | joined_primary <- joined_primary %>% arrange(Player) 46 | 47 | 48 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example 49 | # will need to manually go through each of these to map the correct player 50 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url) 51 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players) 52 | 53 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB 54 | # no_longer_dups <- duplicate_players %>% 55 | # mutate(tm_yob = lubridate::year(player_dob)) %>% 56 | # filter(Born == tm_yob) 57 | # 58 | # still_dups <- no_longer_dups %>% 59 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique() 60 | # 61 | # still_dups <- duplicate_players %>% 62 | # filter(Url %in% still_dups) 63 | # 64 | # no_longer_dups <- no_longer_dups %>% 65 | # filter(!Url %in% still_dups$Url) 66 | 67 | # now remove these records from the raw joined data 68 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df 69 | joined_primary <- joined_primary %>% 70 | filter(!Url %in% duplicate_players$Url) 71 | 72 | # get a full list of joins on full player name that I'm happy with 73 | joined_complete <- joined_primary %>% 74 | filter(!is.na(player_url)) 75 | 76 | # get a list of records where there were no matches on full player name 77 | joined_missing <- joined_primary %>% 78 | filter(is.na(player_url)) 79 | 80 | #----- secondary join type: -----# 81 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players 82 | joined_secondary <- joined_missing %>% select(-player_position) %>% 83 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>% 84 | select(-player_dob, -player_url) %>% 85 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob")) 86 | 87 | # now there are some more duplicates as a result of this secondary join method 88 | additional_duplicated_players <- joined_secondary %>% 89 | filter(!is.na(player_url)) %>% 90 | count(Player, Url, sort = T) %>% 91 | filter(n > 1) %>% pull(Url) %>% unique() 92 | 93 | additional_duplicated_players <- joined_secondary %>% 94 | filter(Url %in% additional_duplicated_players) 95 | 96 | 97 | # combine all duplicated joins for manual rework: 98 | duplicate_players <- duplicate_players %>% 99 | bind_rows(additional_duplicated_players) 100 | 101 | duplicate_players <- duplicate_players %>% 102 | select(-fbref_surname, -player_name) 103 | 104 | 105 | joined_secondary <- joined_secondary %>% 106 | filter(!is.na(player_url), 107 | !Url %in% additional_duplicated_players$Url) %>% 108 | select(Player, Born, Url, player_dob, player_url, player_position) 109 | 110 | 111 | joined_finished <- joined_complete %>% 112 | filter(!is.na(player_url)) %>% 113 | bind_rows(joined_secondary) 114 | 115 | joined_finished <- joined_finished %>% 116 | select(-Squad, -Nation, -tm_squad, -tm_nationality) 117 | 118 | 119 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`: 120 | joined_missing <- joined_missing %>% 121 | filter(!Url %in% joined_finished$Url, 122 | !Url %in% duplicate_players$Url) 123 | 124 | 125 | # write files to work on manually 126 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "joined_finished.csv"), row.names = F) 127 | write.csv(joined_missing, here("output", "initial-match", "working-files", "joined_missing.csv"), row.names = F) 128 | write.csv(tm_unique, here("output", "initial-match", "working-files", "tm_unique.csv"), row.names = F) 129 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "working-files", "duplicate_players_df.csv"), row.names = F) 130 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/create_final_data_initial.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(googlesheets4) 4 | library(here) 5 | 6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 7 | 8 | # read in files 9 | joined_finished <- read.csv(file.path("joined_finished.csv"), stringsAsFactors = F) 10 | joined_missing <- read.csv(file.path("working-files", "joined_missing.csv"), stringsAsFactors = F) 11 | duplicate_players <- tryCatch(read.csv(file.path("working-files", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F), 12 | error = function(e) data.frame()) 13 | 14 | duplicate_players <- duplicate_players %>% select(Player, Born, Url, player_dob, player_url, player_position) 15 | 16 | 17 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>% 18 | arrange(Player) %>% 19 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url)) 20 | 21 | 22 | matched_data <- matched_data %>% 23 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position) 24 | 25 | # some players won't have a position listed (because they haven't been matched automatically) 26 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt) 27 | 28 | # for these URLs, we can get their positions using the `tm_player_bio` function 29 | 30 | missing_pos_bios <- data.frame() 31 | 32 | for (i in 1:length(missing_pos)) { 33 | print(paste0("scraping ", i, "of", length(missing_pos))) 34 | df <- tryCatch(tm_player_bio(player_urls = missing_pos[i]), error = function(e) data.frame()) 35 | missing_pos_bios <- bind_rows(missing_pos_bios, df) 36 | } 37 | 38 | 39 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder" 40 | # we only want "Left Midfiender" 41 | missing_pos_bios <- missing_pos_bios %>% 42 | mutate(TmPos = case_when( 43 | grepl(" - ", position) ~ gsub(".*- ", "", position), 44 | TRUE ~ position 45 | )) 46 | 47 | # join the present and missing player data 48 | matched_data <- matched_data %>% 49 | filter(!is.na(UrlTmarkt)) %>% 50 | filter(!is.na(TmPos)) %>% 51 | bind_rows( 52 | matched_data %>% 53 | filter(!is.na(UrlTmarkt)) %>% 54 | filter(is.na(TmPos)) %>% 55 | select(-TmPos) %>% 56 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL")) 57 | ) %>% 58 | arrange(PlayerFBref) 59 | 60 | 61 | # create final output df 62 | final_output <- bind_rows(existing_df, matched_data) %>% 63 | arrange(PlayerFBref) %>% 64 | distinct(UrlFBref, .keep_all=T) 65 | 66 | #============= 67 | # Write Files 68 | #============= 69 | 70 | # write file for commit to GitHub: 71 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE) 72 | 73 | # Write file to Googlesheets: 74 | # get the sheet id 75 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>% 76 | as.character() 77 | 78 | # write the sheet 79 | sheet_write(final_output, 80 | ss, 81 | sheet = "fbref_to_tm_mapping") 82 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_extra_leagues.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_extra_leagues.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_mls.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_mls.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/fbref_selenium.R: -------------------------------------------------------------------------------- 1 | 2 | library(RSelenium) 3 | library(xml2) 4 | library(rvest) 5 | library(tidyverse) 6 | 7 | 8 | # Set Up Selenium --------------------------------------------------------- 9 | 10 | rD <- rsDriver(browser="firefox", port=4445L, verbose=TRUE) 11 | remDr <- rD[["client"]] 12 | 13 | # remDr$navigate("https://fbref.com/en/comps/23/2020-2021/playingtime/2020-2021-Eredivisie-Stats") 14 | 15 | 16 | # function to open page 17 | read_html_selenium <- function (page_url, driver, sleep) { 18 | 19 | if (missing(driver)) { 20 | driver <- remDr 21 | 22 | } 23 | 24 | if (missing(sleep)) { 25 | sleep <- 0 26 | } 27 | 28 | 29 | remDr$navigate(page_url) 30 | Sys.sleep(1) 31 | # need to get to the bottom of the page to expose all 36 products per page 32 | webElem <- remDr$findElement("css", "body") 33 | Sys.sleep(1) 34 | webElem$sendKeysToElement(list(key = "end")) 35 | Sys.sleep(2) 36 | # webElem <- remDr$findElement("css", ".paginator") 37 | # webElem$sendKeysToElement(list(key = "end")) 38 | 39 | remDr$getPageSource(.) %>% 40 | .[[1]] %>% .[1] %>% read_html(.) 41 | 42 | } 43 | 44 | 45 | 46 | # Variables --------------------------------------------------------------- 47 | 48 | main_url <- "https://fbref.com" 49 | 50 | 51 | country_abbr <- c("NED", "BRA", "MEX", "POR") 52 | gender_M_F <- "M" 53 | season_end_year_num <- c(2019:2023) 54 | comp_tier <- "1st" 55 | 56 | 57 | 58 | # Get Seasons URLs --------------------------------------------------------- 59 | 60 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 61 | 62 | seasons_urls <- seasons %>% 63 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>% 64 | dplyr::filter(country %in% country_abbr, 65 | gender %in% gender_M_F, 66 | season_end_year %in% season_end_year_num, 67 | tier %in% comp_tier) %>% 68 | dplyr::arrange(season_end_year) %>% 69 | dplyr::pull(seasons_urls) %>% unique() 70 | 71 | 72 | championship_seasons_urls <- seasons %>% 73 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>% 74 | dplyr::filter(country == "ENG", 75 | gender == "M", 76 | season_end_year %in% c(2019:2023), 77 | tier == "2nd") %>% 78 | dplyr::arrange(season_end_year) %>% 79 | dplyr::pull(seasons_urls) %>% unique() 80 | 81 | 82 | seasons_urls <- c(seasons_urls, championship_seasons_urls) 83 | 84 | 85 | 86 | # Scrape FBREF ------------------------------------------------------------ 87 | fbref <- data.frame() 88 | 89 | for(season_url in seasons_urls) { 90 | 91 | print(paste0("scraping season: ", season_url)) 92 | 93 | start_part <- sub('/[^/]*$', '', season_url) 94 | end_part <- gsub(".*/", "", season_url) 95 | 96 | stat_urls <- paste0(start_part, "/", "playingtime", "/", end_part) 97 | 98 | Sys.sleep(5) 99 | pg <- read_html_selenium(stat_urls) 100 | 101 | tab_elem <- pg %>% html_elements("#div_stats_playing_time") 102 | 103 | urls <- tab_elem %>% 104 | rvest::html_nodes("table") %>% 105 | rvest::html_nodes("tbody") %>% 106 | rvest::html_nodes("tr") %>% rvest::html_node("td a") %>% rvest::html_attr("href") %>% paste0(main_url, .) 107 | 108 | stat_df <- tab_elem %>% html_table() %>% data.frame() 109 | stat_df <- stat_df[,c(2,3,4,5,7)] 110 | stat_df_names <- stat_df[1,] %>% as.character() 111 | stat_df <- stat_df[-1,] 112 | colnames(stat_df) <- stat_df_names 113 | 114 | stat_df$Url <- urls 115 | stat_df$season_url <- season_url 116 | 117 | stat_df <- stat_df %>% 118 | filter(Nation != "Nation") 119 | 120 | stat_df <- stat_df %>% 121 | left_join(seasons %>% select(season_end_year, competition_name, seasons_urls), by = c("season_url" = "seasons_urls")) 122 | 123 | fbref <- bind_rows(fbref, stat_df) 124 | } 125 | 126 | 127 | 128 | setwd("../") 129 | saveRDS(fbref, "fbref_extra_leagues.rds") 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/get_tm_extra_leagues.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | 6 | 7 | 8 | valuations <- tm_player_market_values(country_name = c("Netherlands", "Portugal", "Brazil", "Mexico"), 9 | start_year = c(2018:2022)) 10 | 11 | saveRDS(valuations, "tm_players_extra_tier1.rds") 12 | 13 | 14 | # 15 | 16 | champ_valuations <- tm_player_market_values(country_name = "", start_year = c(2018:2022), league_url = "https://www.transfermarkt.com/championship/startseite/wettbewerb/GB2") 17 | 18 | saveRDS(champ_valuations, "tm_players_championship.rds") 19 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/build_mapping_dictionary.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | fbref <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_mls.rds")) 6 | tm <- readRDS(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_mls.rds")) 7 | 8 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 9 | 10 | 11 | fbref <- fbref %>% filter(!Url %in% matched_data$UrlFBref) 12 | 13 | 14 | fbref <- fbref %>% 15 | select(season_end_year, Squad, competition_name, Player, Nation, Born, Url) %>% 16 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>% 17 | arrange(Player, Url, desc(season_end_year)) %>% 18 | distinct(Url, .keep_all = T) 19 | 20 | tm <- tm %>% 21 | select(comp_name, region, country, season_start_year, squad, player_name, player_position, 22 | player_dob, player_nationality, player_market_value_euro, player_url) %>% 23 | arrange(player_name) 24 | 25 | # want a df to help with inspection of names with special characters 26 | tm_unique <- tm %>% 27 | arrange(player_name, player_url, desc(season_start_year)) %>% 28 | distinct(player_name, player_dob, player_url, .keep_all = T) %>% 29 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)), 30 | tm_yob = as.character(lubridate::year(player_dob))) 31 | 32 | #----- primary join type: -----# 33 | # here I will join the two datasets on the player names 34 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 35 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name")) 36 | 37 | joined_primary <- fbref %>% select(Player, Born, Url, Squad, Nation) %>% distinct(Url, .keep_all = T) %>% 38 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob, tm_squad=squad, tm_nationality=player_nationality, player_position) %>% distinct(player_url, .keep_all = T), 39 | by = c("Player" = "player_name", "Born" = "tm_yob")) 40 | 41 | # arrange by player name 42 | joined_primary <- joined_primary %>% arrange(Player) 43 | 44 | 45 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example 46 | # will need to manually go through each of these to map the correct player 47 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url) 48 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players) 49 | 50 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB 51 | # no_longer_dups <- duplicate_players %>% 52 | # mutate(tm_yob = lubridate::year(player_dob)) %>% 53 | # filter(Born == tm_yob) 54 | # 55 | # still_dups <- no_longer_dups %>% 56 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique() 57 | # 58 | # still_dups <- duplicate_players %>% 59 | # filter(Url %in% still_dups) 60 | # 61 | # no_longer_dups <- no_longer_dups %>% 62 | # filter(!Url %in% still_dups$Url) 63 | 64 | # now remove these records from the raw joined data 65 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df 66 | joined_primary <- joined_primary %>% 67 | filter(!Url %in% duplicate_players$Url) 68 | 69 | # get a full list of joins on full player name that I'm happy with 70 | joined_complete <- joined_primary %>% 71 | filter(!is.na(player_url)) 72 | 73 | # get a list of records where there were no matches on full player name 74 | joined_missing <- joined_primary %>% 75 | filter(is.na(player_url)) 76 | 77 | #----- secondary join type: -----# 78 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players 79 | joined_secondary <- joined_missing %>% select(-player_position) %>% 80 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>% 81 | select(-player_dob, -player_url) %>% 82 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob")) 83 | 84 | # now there are some more duplicates as a result of this secondary join method 85 | additional_duplicated_players <- joined_secondary %>% 86 | filter(!is.na(player_url)) %>% 87 | count(Player, Url, sort = T) %>% 88 | filter(n > 1) %>% pull(Url) %>% unique() 89 | 90 | additional_duplicated_players <- joined_secondary %>% 91 | filter(Url %in% additional_duplicated_players) 92 | 93 | 94 | # combine all duplicated joins for manual rework: 95 | duplicate_players <- duplicate_players %>% 96 | bind_rows(additional_duplicated_players) 97 | 98 | duplicate_players <- duplicate_players %>% 99 | select(-fbref_surname, -player_name) 100 | 101 | 102 | joined_secondary <- joined_secondary %>% 103 | filter(!is.na(player_url), 104 | !Url %in% additional_duplicated_players$Url) %>% 105 | select(Player, Born, Url, player_dob, player_url, player_position) 106 | 107 | 108 | joined_finished <- joined_complete %>% 109 | filter(!is.na(player_url)) %>% 110 | bind_rows(joined_secondary) 111 | 112 | joined_finished <- joined_finished %>% 113 | select(-Squad, -Nation, -tm_squad, -tm_nationality) 114 | 115 | 116 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`: 117 | joined_missing <- joined_missing %>% 118 | filter(!Url %in% joined_finished$Url, 119 | !Url %in% duplicate_players$Url) 120 | 121 | 122 | # write files to work on manually 123 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_finished.csv"), row.names = F) 124 | write.csv(joined_missing, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_missing.csv"), row.names = F) 125 | write.csv(tm_unique, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "tm_unique.csv"), row.names = F) 126 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "duplicate_players_df.csv"), row.names = F) 127 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/create_final_data_initial.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(googlesheets4) 4 | library(here) 5 | 6 | existing_df <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 7 | 8 | 9 | # read in files 10 | joined_finished <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_finished.csv"), stringsAsFactors = F) 11 | joined_missing <- read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "joined_missing_manual_fix.csv"), stringsAsFactors = F) 12 | duplicate_players <- tryCatch(read.csv(here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "mls", "duplicate_players_df_manual_fix.csv"), stringsAsFactors = F), 13 | error = function(e) data.frame()) 14 | 15 | duplicate_players <- duplicate_players %>% select(Player, Born, Url, player_dob, player_url, player_position) 16 | 17 | 18 | matched_data <- bind_rows(joined_finished, joined_missing, duplicate_players) %>% 19 | arrange(Player) %>% 20 | mutate(player_url = ifelse(player_url == "", NA_character_, player_url)) 21 | 22 | 23 | matched_data <- matched_data %>% 24 | select(PlayerFBref=Player, UrlFBref=Url, UrlTmarkt=player_url, TmPos=player_position) 25 | 26 | # some players won't have a position listed (because they haven't been matched automatically) 27 | missing_pos <- matched_data %>% filter(!is.na(UrlTmarkt) & is.na(TmPos)) %>% pull(UrlTmarkt) 28 | 29 | # for these URLs, we can get their positions using the `tm_player_bio` function 30 | 31 | missing_pos_bios <- data.frame() 32 | 33 | for (i in 1:length(missing_pos)) { 34 | print(paste0("scraping ", i, "of", length(missing_pos))) 35 | df <- tryCatch(tm_player_bio(player_urls = missing_pos[i]), error = function(e) data.frame()) 36 | missing_pos_bios <- bind_rows(missing_pos_bios, df) 37 | } 38 | 39 | 40 | # need to clean these up from the bio data - for some reason soe of them come with the position group (say "midfield") then the true position "Left Midfielder" 41 | # we only want "Left Midfiender" 42 | missing_pos_bios <- missing_pos_bios %>% 43 | mutate(TmPos = case_when( 44 | grepl(" - ", position) ~ gsub(".*- ", "", position), 45 | TRUE ~ position 46 | )) 47 | 48 | # join the present and missing player data 49 | matched_data <- matched_data %>% 50 | filter(!is.na(UrlTmarkt)) %>% 51 | filter(!is.na(TmPos)) %>% 52 | bind_rows( 53 | matched_data %>% 54 | filter(!is.na(UrlTmarkt)) %>% 55 | filter(is.na(TmPos)) %>% 56 | select(-TmPos) %>% 57 | left_join(missing_pos_bios %>% select(URL, TmPos), by = c("UrlTmarkt" = "URL")) 58 | ) %>% 59 | arrange(PlayerFBref) 60 | 61 | 62 | # create final output df 63 | final_output <- bind_rows(existing_df, matched_data) %>% 64 | arrange(PlayerFBref) %>% 65 | distinct(UrlFBref, .keep_all=T) 66 | 67 | #============= 68 | # Write Files 69 | #============= 70 | 71 | # write file for commit to GitHub: 72 | write.csv(final_output, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE) 73 | 74 | # Write file to Googlesheets: 75 | # get the sheet id 76 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>% 77 | as.character() 78 | 79 | # write the sheet 80 | sheet_write(final_output, 81 | ss, 82 | sheet = "fbref_to_tm_mapping") 83 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/duplicate_players_df.csv: -------------------------------------------------------------------------------- 1 | "Player","Born","Url","Squad","Nation","player_dob","player_url","tm_squad","tm_nationality","player_position","comp_name","region","country","season_start_year","squad","player_nationality","player_market_value_euro" 2 | "Angelo Rodríguez","1989","https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez","Minnesota Utd","co COL",1989-04-04,"https://www.transfermarkt.com/angelo-rodriguez/profil/spieler/178116",NA,NA,"Centre-Forward","MLS","Americas","United States",2018,"Minnesota United FC","Colombia",1250000 3 | "Angelo Rodríguez","1989","https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez","Minnesota Utd","co COL",1989-07-23,"https://www.transfermarkt.com/victor-rodriguez/profil/spieler/129753",NA,NA,"Attacking Midfield","MLS","Americas","United States",2018,"Seattle Sounders FC","Spain",1500000 4 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-02-06,"https://www.transfermarkt.com/christian-martinez/profil/spieler/419247",NA,NA,"Midfield","MLS","Americas","United States",2018,"Chicago Fire FC","Panama",450000 5 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-06-05,"https://www.transfermarkt.com/douglas-martinez/profil/spieler/443384",NA,NA,"Centre-Forward","MLS","Americas","United States",2020,"Real Salt Lake City","Honduras",6e+05 6 | "Cristian Martínez","1997","https://fbref.com/en/players/fdb15495/Cristian-Martinez","Chicago Fire","pa PAN",1997-03-15,"https://www.transfermarkt.com/isidro-martinez/profil/spieler/585732",NA,NA,"Defensive Midfield","MLS","Americas","United States",2018,"Houston Dynamo FC","United States",NA 7 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-02-12,"https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287",NA,NA,"Centre-Back","MLS","Americas","United States",2021,"FC Dallas","Spain",1500000 8 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-05-19,"https://www.transfermarkt.com/josef-martinez/profil/spieler/162569",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"Atlanta United FC","Venezuela",12500000 9 | "Gonzalo Nicolás Martínez","1993","https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez","Atlanta Utd","ar ARG",1993-06-13,"https://www.transfermarkt.com/pity-martinez/profil/spieler/281405",NA,NA,"Attacking Midfield","MLS","Americas","United States",2019,"Atlanta United FC","Argentina",1.2e+07 10 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-02-12,"https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287",NA,NA,"Centre-Back","MLS","Americas","United States",2021,"FC Dallas","Spain",1500000 11 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-05-19,"https://www.transfermarkt.com/josef-martinez/profil/spieler/162569",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"Atlanta United FC","Venezuela",12500000 12 | "José Martínez","1993","https://fbref.com/en/players/8e049cbd/Jose-Martinez","FC Dallas","es ESP",1993-06-13,"https://www.transfermarkt.com/pity-martinez/profil/spieler/281405",NA,NA,"Attacking Midfield","MLS","Americas","United States",2019,"Atlanta United FC","Argentina",1.2e+07 13 | "Valentín Castellanos","1998","https://fbref.com/en/players/da76bab4/Valentin-Castellanos","NYCFC","ar ARG",1998-05-11,"https://www.transfermarkt.com/robert-castellanos/profil/spieler/488127",NA,NA,"Centre-Back","MLS","Americas","United States",2020,"Nashville SC","United States",2e+05 14 | "Valentín Castellanos","1998","https://fbref.com/en/players/da76bab4/Valentin-Castellanos","NYCFC","ar ARG",1998-10-03,"https://www.transfermarkt.com/taty-castellanos/profil/spieler/522784",NA,NA,"Centre-Forward","MLS","Americas","United States",2021,"New York City FC","Argentina",1.2e+07 15 | "William Sands","2000","https://fbref.com/en/players/960a4473/William-Sands","Columbus Crew","us USA",2000-07-06,"https://www.transfermarkt.com/james-sands/profil/spieler/393321",NA,NA,"Centre-Back","MLS","Americas","United States",2020,"New York City FC","United States",2500000 16 | "William Sands","2000","https://fbref.com/en/players/960a4473/William-Sands","Columbus Crew","us USA",2000-07-06,"https://www.transfermarkt.com/will-sands/profil/spieler/393327",NA,NA,"Left-Back","MLS","Americas","United States",2021,"Columbus Crew","United States",50000 17 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/duplicate_players_df_manual_fix.csv: -------------------------------------------------------------------------------- 1 | Player,Born,Url,Squad,Nation,player_dob,player_url,tm_squad,tm_nationality,player_position,comp_name,region,country,season_start_year,squad,player_nationality,player_market_value_euro 2 | Angelo Rodríguez,1989,https://fbref.com/en/players/0e9ea6cf/Angelo-Rodriguez,Minnesota Utd,co COL,4/4/1989,https://www.transfermarkt.com/angelo-rodriguez/profil/spieler/178116,NA,NA,Centre-Forward,MLS,Americas,United States,2018,Minnesota United FC,Colombia,1250000 3 | Cristian Martínez,1997,https://fbref.com/en/players/fdb15495/Cristian-Martinez,Chicago Fire,pa PAN,6/2/1997,https://www.transfermarkt.com/christian-martinez/profil/spieler/419247,NA,NA,Midfield,MLS,Americas,United States,2018,Chicago Fire FC,Panama,450000 4 | Gonzalo Nicolás Martínez,1993,https://fbref.com/en/players/cd00ceea/Gonzalo-Nicolas-Martinez,Atlanta Utd,ar ARG,13/6/1993,https://www.transfermarkt.com/pity-martinez/profil/spieler/281405,NA,NA,Attacking Midfield,MLS,Americas,United States,2019,Atlanta United FC,Argentina,1.20E+07 5 | José Martínez,1993,https://fbref.com/en/players/8e049cbd/Jose-Martinez,FC Dallas,es ESP,12/2/1993,https://www.transfermarkt.com/jose-antonio-martinez/profil/spieler/311287,NA,NA,Centre-Back,MLS,Americas,United States,2021,FC Dallas,Spain,1500000 6 | Valentín Castellanos,1998,https://fbref.com/en/players/da76bab4/Valentin-Castellanos,NYCFC,ar ARG,3/10/1998,https://www.transfermarkt.com/taty-castellanos/profil/spieler/522784,NA,NA,Centre-Forward,MLS,Americas,United States,2021,New York City FC,Argentina,1.20E+07 7 | William Sands,2000,https://fbref.com/en/players/960a4473/William-Sands,Columbus Crew,us USA,6/7/2000,https://www.transfermarkt.com/will-sands/profil/spieler/393327,NA,NA,Left-Back,MLS,Americas,United States,2021,Columbus Crew,United States,50000 -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/mls/get_data.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | library(RSelenium) 5 | library(xml2) 6 | library(rvest) 7 | library(tidyverse) 8 | 9 | 10 | # Set Up Selenium --------------------------------------------------------- 11 | 12 | rD <- rsDriver(browser="firefox", port=4445L, verbose=TRUE) 13 | remDr <- rD[["client"]] 14 | 15 | # remDr$navigate("https://fbref.com/en/comps/23/2020-2021/playingtime/2020-2021-Eredivisie-Stats") 16 | 17 | 18 | # function to open page 19 | read_html_selenium <- function (page_url, driver, sleep) { 20 | 21 | if (missing(driver)) { 22 | driver <- remDr 23 | 24 | } 25 | 26 | if (missing(sleep)) { 27 | sleep <- 0 28 | } 29 | 30 | 31 | remDr$navigate(page_url) 32 | Sys.sleep(1) 33 | # need to get to the bottom of the page to expose all 36 products per page 34 | webElem <- remDr$findElement("css", "body") 35 | Sys.sleep(1) 36 | webElem$sendKeysToElement(list(key = "end")) 37 | Sys.sleep(2) 38 | # webElem <- remDr$findElement("css", ".paginator") 39 | # webElem$sendKeysToElement(list(key = "end")) 40 | 41 | remDr$getPageSource(.) %>% 42 | .[[1]] %>% .[1] %>% read_html(.) 43 | 44 | } 45 | 46 | 47 | 48 | # Variables --------------------------------------------------------------- 49 | 50 | main_url <- "https://fbref.com" 51 | 52 | 53 | country_abbr <- c("USA") 54 | gender_M_F <- "M" 55 | season_end_year_num <- c(2019:2023) 56 | comp_tier <- "1st" 57 | 58 | 59 | 60 | # Get Seasons URLs --------------------------------------------------------- 61 | 62 | seasons <- read.csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv", stringsAsFactors = F) 63 | 64 | seasons_urls <- seasons %>% 65 | dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>% 66 | dplyr::filter(country %in% country_abbr, 67 | gender %in% gender_M_F, 68 | season_end_year %in% season_end_year_num, 69 | tier %in% comp_tier) %>% 70 | dplyr::arrange(season_end_year) %>% 71 | dplyr::pull(seasons_urls) %>% unique() 72 | 73 | 74 | # championship_seasons_urls <- seasons %>% 75 | # dplyr::filter(stringr::str_detect(.data[["competition_type"]], "Leagues")) %>% 76 | # dplyr::filter(country == "ENG", 77 | # gender == "M", 78 | # season_end_year %in% c(2019:2023), 79 | # tier == "2nd") %>% 80 | # dplyr::arrange(season_end_year) %>% 81 | # dplyr::pull(seasons_urls) %>% unique() 82 | # 83 | # 84 | # seasons_urls <- c(seasons_urls, championship_seasons_urls) 85 | 86 | 87 | 88 | # Scrape FBREF ------------------------------------------------------------ 89 | fbref <- data.frame() 90 | 91 | for(season_url in seasons_urls) { 92 | 93 | print(paste0("scraping season: ", season_url)) 94 | 95 | start_part <- sub('/[^/]*$', '', season_url) 96 | end_part <- gsub(".*/", "", season_url) 97 | 98 | stat_urls <- paste0(start_part, "/", "playingtime", "/", end_part) 99 | 100 | Sys.sleep(5) 101 | pg <- read_html_selenium(stat_urls) 102 | 103 | tab_elem <- pg %>% html_elements("#div_stats_playing_time") 104 | 105 | urls <- tab_elem %>% 106 | rvest::html_nodes("table") %>% 107 | rvest::html_nodes("tbody") %>% 108 | rvest::html_nodes("tr") %>% rvest::html_node("td a") %>% rvest::html_attr("href") %>% paste0(main_url, .) 109 | 110 | stat_df <- tab_elem %>% html_table() %>% data.frame() 111 | stat_df <- stat_df[,c(2,3,4,5,7)] 112 | stat_df_names <- stat_df[1,] %>% as.character() 113 | stat_df <- stat_df[-1,] 114 | colnames(stat_df) <- stat_df_names 115 | 116 | stat_df$Url <- urls 117 | stat_df$season_url <- season_url 118 | 119 | stat_df <- stat_df %>% 120 | filter(Nation != "Nation") 121 | 122 | stat_df <- stat_df %>% 123 | left_join(seasons %>% select(season_end_year, competition_name, seasons_urls), by = c("season_url" = "seasons_urls")) 124 | 125 | fbref <- bind_rows(fbref, stat_df) 126 | } 127 | 128 | 129 | 130 | setwd("../") 131 | saveRDS(fbref, here::here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "fbref_mls.rds")) 132 | 133 | 134 | library(worldfootballR) 135 | 136 | 137 | valuations <- data.frame() 138 | 139 | for(i in c(2018:2021)) { 140 | 141 | print(paste("scraping year:", i)) 142 | vals <- tm_player_market_values(country_name = c("United States"), 143 | start_year = i) 144 | 145 | valuations <- bind_rows(valuations, vals) 146 | } 147 | 148 | 149 | 150 | saveRDS(valuations, here::here("raw-data", "fbref-tm-player-mapping", "extra-leagues", "initial-match", "tm_players_mls.rds")) 151 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_championship.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_championship.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_extra_tier1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_extra_tier1.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_mls.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/extra-leagues/initial-match/tm_players_mls.rds -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/output/initial-match/build_mapping_dictionary.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | 6 | 7 | playing_time <- fb_big5_advanced_season_stats(season_end_year = c(2018:2022), 8 | stat_type = "playing_time", 9 | team_or_player = "player") 10 | saveRDS(playing_time, "fbref_players.rds") 11 | 12 | 13 | valuations <- get_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 14 | start_year = c(2017:2021)) 15 | saveRDS(valuations, "tm_players.rds") 16 | 17 | ############################################################################################################### 18 | 19 | fbref <- readRDS("fbref_players.rds") 20 | tm <- readRDS("tm_players.rds") 21 | 22 | 23 | fbref <- fbref %>% 24 | select(Season_End_Year, Squad, Comp, Player, Nation, Born, Url) %>% 25 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>% 26 | arrange(Player) 27 | 28 | tm <- tm %>% 29 | select(comp_name, region, country, season_start_year, squad, player_name, player_position, 30 | player_dob, player_nationality, player_market_value_euro, player_url) %>% 31 | arrange(player_name) 32 | 33 | # want a df to help with inspection of names with special characters 34 | tm_unique <- tm %>% 35 | distinct(player_name, player_dob, player_url) %>% 36 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)), 37 | tm_yob = lubridate::year(player_dob)) 38 | 39 | #----- primary join type: -----# 40 | # here I will join the two datasets on the player names 41 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 42 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name")) 43 | 44 | joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 45 | left_join(tm_unique %>% select(player_name, player_dob, player_url, tm_yob) %>% distinct(player_url, .keep_all = T), 46 | by = c("Player" = "player_name", "Born" = "tm_yob")) 47 | 48 | # arrange by player name 49 | joined_primary <- joined_primary %>% arrange(Player) 50 | 51 | 52 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example 53 | # will need to manually go through each of these to map the correct player 54 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url) 55 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players) 56 | 57 | # # inspecting these records, I might be able to get some more hits when comparing the player's YOB 58 | # no_longer_dups <- duplicate_players %>% 59 | # mutate(tm_yob = lubridate::year(player_dob)) %>% 60 | # filter(Born == tm_yob) 61 | # 62 | # still_dups <- no_longer_dups %>% 63 | # count(Player, Url, Born) %>% filter(n>1) %>% pull(Url) %>% unique() 64 | # 65 | # still_dups <- duplicate_players %>% 66 | # filter(Url %in% still_dups) 67 | # 68 | # no_longer_dups <- no_longer_dups %>% 69 | # filter(!Url %in% still_dups$Url) 70 | 71 | # now remove these records from the raw joined data 72 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df 73 | joined_primary <- joined_primary %>% 74 | filter(!Url %in% duplicate_players$Url) 75 | 76 | # get a full list of joins on full player name that I'm happy with 77 | joined_complete <- joined_primary %>% 78 | filter(!is.na(player_url)) 79 | 80 | # get a list of records where there were no matches on full player name 81 | joined_missing <- joined_primary %>% 82 | filter(is.na(player_url)) 83 | 84 | #----- secondary join type: -----# 85 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players 86 | joined_secondary <- joined_missing %>% 87 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>% 88 | select(-player_dob, -player_url) %>% 89 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob")) 90 | 91 | # now there are some more duplicates as a result of this secondary join method 92 | additional_duplicated_players <- joined_secondary %>% 93 | filter(!is.na(player_url)) %>% 94 | count(Player, Url, sort = T) %>% 95 | filter(n > 1) %>% pull(Url) %>% unique() 96 | 97 | additional_duplicated_players <- joined_secondary %>% 98 | filter(Url %in% additional_duplicated_players) 99 | 100 | 101 | # combine all duplicated joins for manual rework: 102 | duplicate_players <- duplicate_players %>% 103 | bind_rows(additional_duplicated_players) 104 | 105 | duplicate_players <- duplicate_players %>% 106 | select(-fbref_surname, -player_name) 107 | 108 | 109 | joined_secondary <- joined_secondary %>% 110 | filter(!is.na(player_url), 111 | !Url %in% additional_duplicated_players$Url) %>% 112 | select(Player, Born, Url, player_dob, player_url) 113 | 114 | 115 | joined_finished <- joined_complete %>% 116 | filter(!is.na(player_url)) %>% 117 | bind_rows(joined_secondary) 118 | 119 | 120 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`: 121 | joined_missing <- joined_missing %>% 122 | filter(!Url %in% joined_finished$Url, 123 | !Url %in% duplicate_players$Url) 124 | 125 | 126 | # write files to work on manually 127 | write.csv(joined_finished, here("output", "initial-match", "joined_finished.csv"), row.names = F) 128 | write.csv(joined_missing, here("output", "initial-match", "working-files", "joined_missing.csv"), row.names = F) 129 | write.csv(tm_unique, here("output", "initial-match", "working-files", "tm_unique.csv"), row.names = F) 130 | write.csv(duplicate_players, here("output", "initial-match", "working-files", "duplicate_players_df.csv"), row.names = F) 131 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/output/initial-match/working-files/duplicate_players_df.csv: -------------------------------------------------------------------------------- 1 | Player,Born,Url,player_dob,player_url,fbref_surname,player_name 2 | Adama Traoré,1995,https://fbref.com/en/players/1a6f2a66/Adama-Traore,28/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/262608,NA,NA 3 | Adama Traoré,1995,https://fbref.com/en/players/f9edc384/Adama-Traore,5/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/364405,NA,NA 4 | Guilherme,1991,https://fbref.com/en/players/9e61c019/Guilherme,21/5/91,https://www.transfermarkt.com/guilherme/profil/spieler/139607,NA,NA 5 | Guilherme,1991,https://fbref.com/en/players/8754c7ca/Guilherme,5/4/91,https://www.transfermarkt.com/guilherme/profil/spieler/115382,NA,NA 6 | Rafael,1990,https://fbref.com/en/players/9a1f2e1c/Rafael,9/7/90,https://www.transfermarkt.com/rafael/profil/spieler/61892,NA,NA 7 | Adrián López,1999,https://fbref.com/en/players/c58ebf64/Adrian-Lopez,9/1/99,https://www.transfermarkt.com/adri-lopez/profil/spieler/412042,López,Adri López 8 | Alejandro López,1997,https://fbref.com/en/players/f1887f53/Alejandro-Lopez,2/6/97,https://www.transfermarkt.com/alex-lopez/profil/spieler/313113,López,Álex López 9 | Amadou Dia Ndiaye,2000,https://fbref.com/en/players/1f0ea0a6/Amadou-Dia-Ndiaye,2/1/00,https://www.transfermarkt.com/amadou-ndiaye/profil/spieler/568695,Ndiaye,Amadou Ndiaye 10 | Basit Abdallah,1999,https://fbref.com/en/players/13a0ff99/Basit-Abdallah,1/7/99,https://www.transfermarkt.com/abdallah-basit/profil/spieler/457773,Abdallah,Benrandy Abdallah 11 | Cal Roberts,1997,https://fbref.com/en/players/8e9caf48/Cal-Roberts,14/4/97,https://www.transfermarkt.com/callum-roberts/profil/spieler/288952,Roberts,Callum Roberts 12 | Cristo González,1997,https://fbref.com/en/players/896b5df2/Cristo-Gonzalez,1/4/97,https://www.transfermarkt.com/cristo/profil/spieler/339707,González,Edgar González 13 | Daniel Martín,1998,https://fbref.com/en/players/4df0dff8/Daniel-Martin,8/7/98,https://www.transfermarkt.com/dani-martin/profil/spieler/335221,Martín,Dani Martín 14 | Daniel Torres,1989,https://fbref.com/en/players/87882adc/Daniel-Torres,15/11/89,https://www.transfermarkt.com/dani-torres/profil/spieler/93142,Torres,Dani Torres 15 | David Pereira da Costa,2001,https://fbref.com/en/players/59948ef7/David-Pereira-da-Costa,5/1/01,https://www.transfermarkt.com/david-costa/profil/spieler/719442,Costa,David Costa 16 | Diego Matías Rodríguez,1989,https://fbref.com/en/players/19f4d2c0/Diego-Matias-Rodriguez,25/6/89,https://www.transfermarkt.com/diego-rodriguez/profil/spieler/90800,Rodríguez,Diego Rodríguez 17 | Dion-Curtis Henry,1997,https://fbref.com/en/players/156bb589/Dion-Curtis-Henry,12/9/97,https://www.transfermarkt.com/dion-henry/profil/spieler/345899,Henry,Dion Henry 18 | Édgar González,1997,https://fbref.com/en/players/49d028db/Edgar-Gonzalez,1/4/97,https://www.transfermarkt.com/edgar-gonzalez/profil/spieler/401624,González,Edgar González 19 | Eduardo Bubacar Baldé,1999,https://fbref.com/en/players/3caf4f73/Eduardo-Bubacar-Balde,10/3/99,https://www.transfermarkt.com/eduardo-balde/profil/spieler/529356,Baldé,Eduardo Baldé 20 | Flavio Junior Bianchi,2000,https://fbref.com/en/players/3ef965c1/Flavio-Junior-Bianchi,24/1/00,https://www.transfermarkt.com/flavio-bianchi/profil/spieler/364132,Bianchi,Flavio Bianchi 21 | Florent da Silva,2003,https://fbref.com/en/players/8db95f95/Florent-da-Silva,2/4/03,https://www.transfermarkt.com/florent-da-silva/profil/spieler/607225,Silva,Florent Da Silva 22 | Hianga Mananga Mbock,1999,https://fbref.com/en/players/0f86995c/Hianga-Mananga-Mbock,28/12/99,https://www.transfermarkt.com/hiangaa-mbock/profil/spieler/684062,Mbock,Hianga'a Mbock 23 | Javier Jiménez García,1997,https://fbref.com/en/players/f30d7505/Javier-Jimenez-Garcia,28/6/97,https://www.transfermarkt.com/javi-jimenez/profil/spieler/251860,García,Aleix García 24 | Javier Martín,1998,https://fbref.com/en/players/789773d9/Javier-Martin,25/1/98,https://www.transfermarkt.com/javi-martin/profil/spieler/534372,Martín,Álex Martín 25 | Joan García,2001,https://fbref.com/en/players/87b498b0/Joan-Garcia,12/2/01,https://www.transfermarkt.com/joan-garcia/profil/spieler/561613,García,Carlo García 26 | João Paulo Santos Costa,1996,https://fbref.com/en/players/00225aae/Joao-Paulo-Santos-Costa,2/2/96,https://www.transfermarkt.com/joao-costa/profil/spieler/198638,Costa,João Costa 27 | Joel Castro Pereira,1996,https://fbref.com/en/players/881e5db7/Joel-Castro-Pereira,28/6/96,https://www.transfermarkt.com/joel-pereira/profil/spieler/192611,Pereira,Joel Pereira 28 | José Mena Rodríguez,1998,https://fbref.com/en/players/0af4b238/Jose-Mena-Rodriguez,23/3/98,https://www.transfermarkt.com/pepe-mena/profil/spieler/396145,Rodríguez,Genaro Rodríguez 29 | Leonardo Suárez,1996,https://fbref.com/en/players/25c72b36/Leonardo-Suarez,30/3/96,https://www.transfermarkt.com/leo-suarez/profil/spieler/294894,Suárez,Leo Suárez 30 | Lluis López,1997,https://fbref.com/en/players/a685f013/Lluis-Lopez,5/3/97,https://www.transfermarkt.com/lluis-lopez/profil/spieler/262391,López,Lluís López 31 | Mama Samba Baldé,1995,https://fbref.com/en/players/fb14aa28/Mama-Samba-Balde,6/11/95,https://www.transfermarkt.com/mama-balde/profil/spieler/325223,Baldé,Mama Baldé 32 | Manuel Sánchez,2000,https://fbref.com/en/players/ffacd3d5/Manuel-Sanchez,24/8/00,https://www.transfermarkt.com/manu-sanchez/profil/spieler/618809,Sánchez,Manu Sánchez 33 | Martín,1999,https://fbref.com/en/players/cbec0059/Martin,11/7/99,https://www.transfermarkt.com/martin-calderon/profil/spieler/278404,Martín,Andrés Martín 34 | Nelson Sissoko,1997,https://fbref.com/en/players/12bd0579/Nelson-Sissoko,7/3/97,https://www.transfermarkt.com/alpha-sissoko/profil/spieler/594992,Sissoko,Alpha Sissoko 35 | Pio Francesco Russo,1999,https://fbref.com/en/players/5e913bf9/Pio-Francesco-Russo,1/3/99,https://www.transfermarkt.com/francesco-pio-russo/profil/spieler/315866,Russo,Francesco Pio Russo 36 | Raúl García,1989,https://fbref.com/en/players/1a317a1b/Raul-Garcia,25/11/89,https://www.transfermarkt.com/raul-carnero/profil/spieler/139434,García,Kike García 37 | Samu Pérez,1997,https://fbref.com/en/players/aae17c81/Samu-Perez,26/4/97,https://www.transfermarkt.com/samuel-perez/profil/spieler/363541,Pérez,Samuel Pérez -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/output/initial-match/working-files/duplicate_players_df_manual_fix.csv: -------------------------------------------------------------------------------- 1 | Player,Born,Url,player_dob,player_url,fbref_surname,player_name 2 | Adama Traoré,1995,https://fbref.com/en/players/1a6f2a66/Adama-Traore,28/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/262608,NA,NA 3 | Adama Traoré,1995,https://fbref.com/en/players/f9edc384/Adama-Traore,5/6/95,https://www.transfermarkt.com/adama-traore/profil/spieler/364405,NA,NA 4 | Guilherme,1991,https://fbref.com/en/players/9e61c019/Guilherme,21/5/91,https://www.transfermarkt.com/guilherme/profil/spieler/139607,NA,NA 5 | Guilherme,1991,https://fbref.com/en/players/8754c7ca/Guilherme,5/4/91,https://www.transfermarkt.com/guilherme/profil/spieler/115382,NA,NA 6 | Rafael,1990,https://fbref.com/en/players/9a1f2e1c/Rafael,9/7/90,https://www.transfermarkt.com/rafael/profil/spieler/61892,NA,NA 7 | Adrián López,1999,https://fbref.com/en/players/c58ebf64/Adrian-Lopez,9/1/99,https://www.transfermarkt.com/adri-lopez/profil/spieler/412042,López,Adri López 8 | Alejandro López,1997,https://fbref.com/en/players/f1887f53/Alejandro-Lopez,2/6/97,https://www.transfermarkt.com/alex-lopez/profil/spieler/313113,López,Álex López 9 | Amadou Dia Ndiaye,2000,https://fbref.com/en/players/1f0ea0a6/Amadou-Dia-Ndiaye,2/1/00,https://www.transfermarkt.com/amadou-ndiaye/profil/spieler/568695,Ndiaye,Amadou Ndiaye 10 | Basit Abdallah,1999,https://fbref.com/en/players/13a0ff99/Basit-Abdallah,1/7/99,https://www.transfermarkt.com/abdallah-basit/profil/spieler/457773,Abdallah,Benrandy Abdallah 11 | Cal Roberts,1997,https://fbref.com/en/players/8e9caf48/Cal-Roberts,14/4/97,https://www.transfermarkt.com/callum-roberts/profil/spieler/288952,Roberts,Callum Roberts 12 | Cristo González,1997,https://fbref.com/en/players/896b5df2/Cristo-Gonzalez,1/4/97,https://www.transfermarkt.com/cristo/profil/spieler/339707,González,Edgar González 13 | Daniel Martín,1998,https://fbref.com/en/players/4df0dff8/Daniel-Martin,8/7/98,https://www.transfermarkt.com/dani-martin/profil/spieler/335221,Martín,Dani Martín 14 | Daniel Torres,1989,https://fbref.com/en/players/87882adc/Daniel-Torres,15/11/89,https://www.transfermarkt.com/dani-torres/profil/spieler/93142,Torres,Dani Torres 15 | David Pereira da Costa,2001,https://fbref.com/en/players/59948ef7/David-Pereira-da-Costa,5/1/01,https://www.transfermarkt.com/david-costa/profil/spieler/719442,Costa,David Costa 16 | Diego Matías Rodríguez,1989,https://fbref.com/en/players/19f4d2c0/Diego-Matias-Rodriguez,25/6/89,https://www.transfermarkt.com/diego-rodriguez/profil/spieler/90800,Rodríguez,Diego Rodríguez 17 | Dion-Curtis Henry,1997,https://fbref.com/en/players/156bb589/Dion-Curtis-Henry,12/9/97,https://www.transfermarkt.com/dion-henry/profil/spieler/345899,Henry,Dion Henry 18 | Édgar González,1997,https://fbref.com/en/players/49d028db/Edgar-Gonzalez,1/4/97,https://www.transfermarkt.com/edgar-gonzalez/profil/spieler/401624,González,Edgar González 19 | Eduardo Bubacar Baldé,1999,https://fbref.com/en/players/3caf4f73/Eduardo-Bubacar-Balde,10/3/99,https://www.transfermarkt.com/eduardo-balde/profil/spieler/529356,Baldé,Eduardo Baldé 20 | Flavio Junior Bianchi,2000,https://fbref.com/en/players/3ef965c1/Flavio-Junior-Bianchi,24/1/00,https://www.transfermarkt.com/flavio-bianchi/profil/spieler/364132,Bianchi,Flavio Bianchi 21 | Florent da Silva,2003,https://fbref.com/en/players/8db95f95/Florent-da-Silva,2/4/03,https://www.transfermarkt.com/florent-da-silva/profil/spieler/607225,Silva,Florent Da Silva 22 | Hianga Mananga Mbock,1999,https://fbref.com/en/players/0f86995c/Hianga-Mananga-Mbock,28/12/99,https://www.transfermarkt.com/hiangaa-mbock/profil/spieler/684062,Mbock,Hianga'a Mbock 23 | Javier Jiménez García,1997,https://fbref.com/en/players/f30d7505/Javier-Jimenez-Garcia,28/6/97,https://www.transfermarkt.com/javi-jimenez/profil/spieler/251860,García,Aleix García 24 | Javier Martín,1998,https://fbref.com/en/players/789773d9/Javier-Martin,25/1/98,https://www.transfermarkt.com/javi-martin/profil/spieler/534372,Martín,Álex Martín 25 | Joan García,2001,https://fbref.com/en/players/87b498b0/Joan-Garcia,12/2/01,https://www.transfermarkt.com/joan-garcia/profil/spieler/561613,García,Carlo García 26 | João Paulo Santos Costa,1996,https://fbref.com/en/players/00225aae/Joao-Paulo-Santos-Costa,2/2/96,https://www.transfermarkt.com/joao-costa/profil/spieler/198638,Costa,João Costa 27 | Joel Castro Pereira,1996,https://fbref.com/en/players/881e5db7/Joel-Castro-Pereira,28/6/96,https://www.transfermarkt.com/joel-pereira/profil/spieler/192611,Pereira,Joel Pereira 28 | José Mena Rodríguez,1998,https://fbref.com/en/players/0af4b238/Jose-Mena-Rodriguez,23/3/98,https://www.transfermarkt.com/pepe-mena/profil/spieler/396145,Rodríguez,Genaro Rodríguez 29 | Leonardo Suárez,1996,https://fbref.com/en/players/25c72b36/Leonardo-Suarez,30/3/96,https://www.transfermarkt.com/leo-suarez/profil/spieler/294894,Suárez,Leo Suárez 30 | Lluis López,1997,https://fbref.com/en/players/a685f013/Lluis-Lopez,5/3/97,https://www.transfermarkt.com/lluis-lopez/profil/spieler/262391,López,Lluís López 31 | Mama Samba Baldé,1995,https://fbref.com/en/players/fb14aa28/Mama-Samba-Balde,6/11/95,https://www.transfermarkt.com/mama-balde/profil/spieler/325223,Baldé,Mama Baldé 32 | Manuel Sánchez,2000,https://fbref.com/en/players/ffacd3d5/Manuel-Sanchez,24/8/00,https://www.transfermarkt.com/manu-sanchez/profil/spieler/618809,Sánchez,Manu Sánchez 33 | Martín,1999,https://fbref.com/en/players/cbec0059/Martin,11/7/99,https://www.transfermarkt.com/martin-calderon/profil/spieler/278404,Martín,Andrés Martín 34 | Nelson Sissoko,1997,https://fbref.com/en/players/12bd0579/Nelson-Sissoko,7/3/97,https://www.transfermarkt.com/alpha-sissoko/profil/spieler/594992,Sissoko,Alpha Sissoko 35 | Pio Francesco Russo,1999,https://fbref.com/en/players/5e913bf9/Pio-Francesco-Russo,1/3/99,https://www.transfermarkt.com/francesco-pio-russo/profil/spieler/315866,Russo,Francesco Pio Russo 36 | Raúl García,1989,https://fbref.com/en/players/1a317a1b/Raul-Garcia,25/11/89,https://www.transfermarkt.com/raul-carnero/profil/spieler/139434,García,Kike García 37 | Samu Pérez,1997,https://fbref.com/en/players/aae17c81/Samu-Perez,26/4/97,https://www.transfermarkt.com/samuel-perez/profil/spieler/363541,Pérez,Samuel Pérez -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/output/working-files/duplicate_players_df.csv: -------------------------------------------------------------------------------- 1 | "Player","Born","Url","player_dob","player_url","player_position" 2 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/prepare_working_files.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(here) 4 | 5 | playing_time <- fb_big5_advanced_season_stats(season_end_year = 2024, 6 | stat_type = "playing_time", 7 | team_or_player = "player") 8 | 9 | tm <- tm_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 10 | start_year = 2023) 11 | 12 | 13 | 14 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 15 | 16 | 17 | fbref <- playing_time %>% filter(!Url %in% matched_data$UrlFBref) 18 | 19 | fbref <- fbref %>% 20 | # have made the decision to get rid of players that were listed on team sheets but 21 | # haven't yet played as there's too many manual matches of youth players 22 | filter(!is.na(Min_Playing.Time)) %>% 23 | select(Season_End_Year, Squad, Comp, Player, Nation, Born, Url) %>% 24 | mutate(fbref_surname = str_squish(gsub(".*\\s", "", Player))) %>% 25 | arrange(Player) 26 | 27 | tm <- tm %>% 28 | select(comp_name, region, country, season_start_year, squad, player_name, player_position, 29 | player_dob, player_nationality, player_market_value_euro, player_url) %>% 30 | arrange(player_name) 31 | 32 | # want a df to help with inspection of names with special characters 33 | tm_unique <- tm %>% 34 | arrange(player_url, desc(season_start_year)) %>% 35 | distinct(player_name, player_dob, player_url, .keep_all = TRUE) %>% 36 | select(player_name, player_dob, player_url, player_position) %>% 37 | mutate(tm_surname = str_squish(gsub(".*\\s", "", player_name)), 38 | tm_yob = lubridate::year(player_dob)) 39 | 40 | #----- primary join type: -----# 41 | # here I will join the two datasets on the player names 42 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 43 | # left_join(tm %>% select(player_name, player_dob, player_url) %>% distinct(player_url, .keep_all = T), by = c("Player" = "player_name")) 44 | 45 | # stringi::stri_trans_general("Audric Estimé", "latin-ascii") 46 | 47 | joined_primary <- fbref %>% 48 | select(Player, Born, Url) %>% 49 | mutate(Player = stringi::stri_trans_general(Player, "latin-ascii")) |> 50 | distinct(Url, .keep_all = T) %>% 51 | left_join( 52 | tm_unique %>% 53 | select(player_name, player_dob, player_url, player_position, tm_yob) %>% 54 | mutate(player_name = stringi::stri_trans_general(player_name, "latin-ascii")) |> 55 | distinct(player_url, .keep_all = T), 56 | by = c("Player" = "player_name", "Born" = "tm_yob") 57 | ) 58 | 59 | 60 | # joined_primary <- fbref %>% select(Player, Born, Url) %>% distinct(Url, .keep_all = T) %>% 61 | # left_join(tm_unique %>% select(player_name, player_dob, player_url, player_position, tm_yob) %>% distinct(player_url, .keep_all = T), 62 | # by = c("Player" = "player_name", "Born" = "tm_yob")) 63 | 64 | # arrange by player name 65 | joined_primary <- joined_primary %>% arrange(Player) 66 | 67 | 68 | # these players have multiple records in each data set - think "Adama Traoré" or "Rafael" or "Raúl García" for example 69 | # will need to manually go through each of these to map the correct player 70 | duplicate_players <- joined_primary %>% count(Player, Url, sort = T) %>% filter(n > 1) %>% pull(Url) 71 | duplicate_players <- joined_primary %>% filter(Url %in% duplicate_players) 72 | 73 | 74 | # now remove these records from the raw joined data 75 | # IMPORTANT: remember to add `duplicate_players_df` that has been cleaned manually back to the main df 76 | joined_primary <- joined_primary %>% 77 | filter(!Url %in% duplicate_players$Url) 78 | 79 | # get a full list of joins on full player name that I'm happy with 80 | joined_complete <- joined_primary %>% 81 | filter(!is.na(player_url)) 82 | 83 | # get a list of records where there were no matches on full player name 84 | joined_missing <- joined_primary %>% 85 | filter(is.na(player_url)) 86 | 87 | #----- secondary join type: -----# 88 | # here I'll try to join on surname and year of birth - would be nice to use DOB instead but I don't have it for FBref players 89 | joined_secondary <- joined_missing %>% 90 | mutate(fbref_surname = gsub(".*\\s", "", Player)) %>% 91 | select(-player_dob, -player_url) %>% 92 | left_join(tm_unique, by = c("fbref_surname" = "tm_surname", "Born" = "tm_yob", "player_position")) 93 | 94 | # now there are some more duplicates as a result of this secondary join method 95 | additional_duplicated_players <- joined_secondary %>% 96 | filter(!is.na(player_url)) %>% 97 | count(Player, Url, sort = T) %>% 98 | filter(n > 1) %>% pull(Url) %>% unique() 99 | 100 | additional_duplicated_players <- joined_secondary %>% 101 | filter(Url %in% additional_duplicated_players) 102 | 103 | 104 | # combine all duplicated joins for manual rework: 105 | duplicate_players <- duplicate_players %>% 106 | bind_rows(additional_duplicated_players) 107 | 108 | duplicate_players <- duplicate_players %>% 109 | select(-fbref_surname, -player_name) 110 | 111 | 112 | joined_secondary <- joined_secondary %>% 113 | filter(!is.na(player_url), 114 | !Url %in% additional_duplicated_players$Url) %>% 115 | select(Player, Born, Url, player_dob, player_url) 116 | 117 | 118 | joined_finished <- joined_complete %>% 119 | filter(!is.na(player_url)) %>% 120 | bind_rows(joined_secondary) 121 | 122 | 123 | # create a file for manual rework by removing any of the records that have been matched since the creation of `joined_missing`: 124 | joined_missing <- joined_missing %>% 125 | filter(!Url %in% joined_finished$Url, 126 | !Url %in% duplicate_players$Url) 127 | 128 | 129 | # write files to work on manually 130 | write.csv(joined_finished, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_finished.csv"), row.names = F) 131 | write.csv(joined_missing, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "joined_missing.csv"), row.names = F) 132 | write.csv(tm_unique, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "tm_unique.csv"), row.names = F) 133 | write.csv(duplicate_players, here("raw-data", "fbref-tm-player-mapping", "output", "working-files", "duplicate_players_df.csv"), row.names = F) 134 | -------------------------------------------------------------------------------- /raw-data/fbref-tm-player-mapping/update_player_positions.R: -------------------------------------------------------------------------------- 1 | library(worldfootballR) 2 | library(tidyverse) 3 | library(googlesheets4) 4 | library(here) 5 | library(gt) 6 | 7 | # first, we get updated data to ensure all new players are being captured 8 | playing_time <- fb_big5_advanced_season_stats(season_end_year = 2023, 9 | stat_type = "playing_time", 10 | team_or_player = "player") 11 | 12 | tm <- get_player_market_values(country_name = c("England", "Spain", "France", "Italy", "Germany"), 13 | start_year = 2022) 14 | 15 | # saveRDS(tm, here("raw-data", "fbref-tm-player-mapping", "data", "tm_data.rds")) 16 | 17 | # read in matched data 18 | matched_data <- read.csv("https://github.com/JaseZiv/worldfootballR_data/raw/master/raw-data/fbref-tm-player-mapping/output/fbref_to_tm_mapping.csv", stringsAsFactors = FALSE) 19 | 20 | # create a separate df to allow for analysis of players who have changed positions 21 | this_season <- matched_data %>% 22 | left_join(tm %>% select(squad, UrlTmarkt=player_url, TmPos_22_23=player_position, player_market_value_euro), by = "UrlTmarkt") 23 | 24 | # display players that have changed data 25 | this_season %>% 26 | filter(!is.na(TmPos_22_23)) %>% 27 | filter(TmPos_22_23 != TmPos) %>% 28 | select(PlayerFBref, squad, TmPos, TmPos_22_23, player_market_value_euro) %>% 29 | arrange(desc(player_market_value_euro)) %>% 30 | mutate(player_market_value_euro = scales::dollar(player_market_value_euro, prefix = "€")) %>% 31 | gt() %>% 32 | tab_options(column_labels.font.size = 20, 33 | column_labels.font.weight = "bold") 34 | 35 | 36 | 37 | # now overwrite older positions with the new ones 38 | matched_data <- matched_data %>% 39 | left_join(tm %>% select(UrlTmarkt=player_url, TmPos_22_23=player_position), by = "UrlTmarkt") %>% 40 | mutate( 41 | TmPos = case_when( 42 | is.na(TmPos_22_23) ~ TmPos, 43 | TRUE ~ TmPos_22_23 44 | ) 45 | ) %>% 46 | select(-TmPos_22_23) 47 | 48 | 49 | #============= 50 | # Write Files 51 | #============= 52 | 53 | # write file for commit to GitHub: 54 | write.csv(matched_data, here("raw-data", "fbref-tm-player-mapping", "output", "fbref_to_tm_mapping.csv"), row.names = FALSE) 55 | 56 | # Write file to Googlesheets: 57 | # get the sheet id 58 | ss <- as_sheets_id("https://docs.google.com/spreadsheets/d/1GjjS9IRp6FVzVX5QyfmttMk8eYBtIzuZ_YIM0VWg8OY/edit#gid=61874932") %>% 59 | as.character() 60 | 61 | # write the sheet 62 | sheet_write(matched_data, 63 | ss, 64 | sheet = "fbref_to_tm_mapping") 65 | 66 | 67 | -------------------------------------------------------------------------------- /raw-data/fotmob-leagues/all_leagues.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/65285049f67e7626596095bdd5b1045c3d275144/raw-data/fotmob-leagues/all_leagues.csv -------------------------------------------------------------------------------- /raw-data/job_controller.R: -------------------------------------------------------------------------------- 1 | # SCHEDULED SCRAPERS 2 | 3 | # load libraries 4 | library(here) 5 | 6 | # Scrape League Seasons Data ------------------------------------------------------ 7 | # source(here::here("raw-data", "league_seasons", "get_league_seasons.R")) 8 | 9 | # Scrape Countries Data --------------------------------------------------- 10 | # source(here::here("raw-data", "countries_list", "get_countries_list.R")) 11 | 12 | 13 | 14 | # Scrape All Competition Season’s Data ------------------------------------ 15 | source(here::here("raw-data", "all_leages_and_cups", "get_all_comp_seasons.R")) 16 | 17 | 18 | 19 | # Scrape Transfermarkt Data ----------------------------------------------- 20 | source(here::here("raw-data", "transfermarkt_leagues", "get_transfermarkt_metadata.R")) 21 | 22 | 23 | # Scrape Fotmob league data ---------------------------------------------- 24 | # source(here::here("raw-data", "fotmob-leagues", "get_fotmob_leagues.R")) 25 | 26 | -------------------------------------------------------------------------------- /raw-data/league_seasons/get_league_seasons.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(worldfootballR) 3 | 4 | .get_tier1_competitions <- function() { 5 | main_url <- "https://fbref.com" 6 | # read page to all competitions 7 | all_comps_url <- xml2::read_html("https://fbref.com/en/comps/") 8 | # this just gets the Tier 1 club comps - this will need to be modified if more comps are required 9 | comps <- all_comps_url %>% rvest::html_nodes("#all_comps_1_fa_club_league_senior") 10 | # get the urls for each competition, then paste fbref url 11 | competition_urls <- comps %>% rvest::html_node("tbody") %>% rvest::html_nodes("th a") %>% rvest::html_attr("href") 12 | competition_urls <- paste0(main_url, competition_urls) 13 | # scrape the table that contains the competitons 14 | competitions <- comps %>% rvest::html_nodes(".sortable") %>% rvest::html_table() %>% data.frame() 15 | # add the competition url column 16 | competitions <- cbind(competitions, competition_urls) 17 | # remove the two character country code for the flag, and only leave the 3 character code 18 | competitions$Country <- gsub(".*? ", "", competitions$Country) 19 | 20 | return(competitions) 21 | } 22 | 23 | 24 | get_league_seasons_url <- function() { 25 | main_url <- "https://fbref.com" 26 | 27 | competitions <- .get_tier1_competitions() 28 | 29 | league_urls <- competitions %>% 30 | dplyr::pull(.data$competition_urls) 31 | 32 | get_urls <- function(league_url) { 33 | print(glue::glue("Scraping season URLs from {league_url}")) 34 | league_page <- xml2::read_html(league_url) 35 | 36 | seasons <- league_page %>% 37 | rvest::html_nodes("th a") %>% 38 | rvest::html_text() 39 | 40 | season_end_year <- league_page %>% 41 | rvest::html_nodes("th a") %>% 42 | rvest::html_text() %>% 43 | gsub(".*-", "", .) 44 | 45 | 46 | seasons_urls <- league_page %>% 47 | rvest::html_nodes("th a") %>% 48 | rvest::html_attr("href") %>% 49 | paste0(main_url, .) 50 | 51 | # fixtures_url <- xml2::read_html(season_url) %>% 52 | # rvest::html_nodes(".hoversmooth") %>% 53 | # rvest::html_nodes(".full") %>% 54 | # rvest::html_nodes("a") %>% 55 | # rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .) 56 | 57 | get_fixtures <- function(season_url) { 58 | round(runif(1, 3, 10)) 59 | fixtures_url <- xml2::read_html(season_url) %>% 60 | rvest::html_nodes(".hoversmooth") %>% 61 | rvest::html_nodes(".full") %>% 62 | rvest::html_nodes("a") %>% 63 | rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .) 64 | 65 | fixtures_url <- if(grepl("Fixtures", fixtures_url)){ 66 | fixtures_url <- fixtures_url 67 | } else { 68 | fixtures_url <- NA 69 | } 70 | 71 | return(fixtures_url) 72 | } 73 | 74 | fixtures_url <- seasons_urls %>% 75 | purrr::map_chr(get_fixtures) 76 | 77 | all_league_seasons <- cbind(league_url, seasons, season_end_year, seasons_urls, fixtures_url) %>% data.frame() 78 | 79 | 80 | return(all_league_seasons) 81 | } 82 | 83 | all_urls <- league_urls %>% 84 | purrr::map_df(get_urls) %>% 85 | dplyr::left_join(competitions, ., by = c("competition_urls" = "league_url")) %>% 86 | janitor::clean_names() 87 | 88 | } 89 | 90 | 91 | 92 | all_tier1_season_URLs <- get_league_seasons_url() 93 | 94 | write.csv(all_tier1_season_URLs, here::here("raw-data", "league_seasons", "all_tier1_season_URLs.csv"), row.names = F) 95 | -------------------------------------------------------------------------------- /raw-data/transfermarkt_leagues/get_transfermarkt_metadata.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(rvest) 3 | 4 | main_url <- "https://www.transfermarkt.com" 5 | 6 | 7 | # Get Competitions -------------------------------------------------------- 8 | 9 | # need to hard code this - no idea where to go to get the regions: 10 | regions <- c("europa", "asien", "amerika", "afrika") 11 | 12 | # region_urls <- paste0("https://www.transfermarkt.com/wettbewerbe/", regions) 13 | 14 | all_comps <- data.frame() 15 | 16 | for(region in regions) { 17 | print(paste("Scraping league URLs from the", region, "region")) 18 | region_url <- paste0("https://www.transfermarkt.com/wettbewerbe/", region) 19 | Sys.sleep(3) 20 | comp <- xml2::read_html(region_url) 21 | 22 | comp_name <- comp %>% rvest::html_nodes(".inline-table td+ td a") %>% rvest::html_text() 23 | comp_url <- comp %>% rvest::html_nodes(".inline-table td+ td a") %>% rvest::html_attr("href") %>% paste0(main_url, .) 24 | 25 | flags_list <- comp %>% rvest::html_nodes(".hauptlink+ .zentriert") 26 | country <- c() 27 | for(i in 1:length(flags_list)) { 28 | a <- xml2::xml_attrs(xml2::xml_child(flags_list[[i]], 1))[["title"]] 29 | country <- c(country, a) 30 | } 31 | 32 | comps_df <- cbind(comp_name, region, country, comp_url) %>% data.frame() 33 | 34 | all_comps <- rbind(all_comps, comps_df) 35 | } 36 | 37 | 38 | # Get season URLs --------------------------------------------------------- 39 | 40 | 41 | all_seasons_df <- data.frame() 42 | 43 | for(each_league_url in 1:nrow(all_comps)) { 44 | print(paste0("scraping league ", each_league_url, " of ", nrow(all_comps))) 45 | Sys.sleep(4) 46 | comp_url <- all_comps$comp_url[each_league_url] 47 | league_page <- xml2::read_html(comp_url) 48 | 49 | seasons <- league_page %>% rvest::html_nodes(".chzn-select") %>% rvest::html_nodes("option") 50 | 51 | season_start_year <- c() 52 | for(each_season in seasons) { 53 | season_start_year <- c(season_start_year, xml2::xml_attrs(each_season)[["value"]]) 54 | } 55 | 56 | season_urls <- paste0(comp_url, "/plus/?saison_id=", season_start_year) 57 | 58 | league_seasons_df <- cbind(comp_url, season_start_year, season_urls) %>% data.frame() 59 | 60 | all_seasons_df <- rbind(all_seasons_df, league_seasons_df) 61 | } 62 | 63 | all_data <- all_comps %>% 64 | dplyr::left_join(all_seasons_df, by = "comp_url") 65 | 66 | all_data <- all_data %>% 67 | dplyr::mutate(region = dplyr::case_when( 68 | region == "europa" ~ "Europe", 69 | region == "asien" ~ "Asia", 70 | region == "amerika" ~ "Americas", 71 | region == "afrika" ~ "Africa" 72 | )) 73 | 74 | write.csv(all_data, here::here("raw-data", "transfermarkt_leagues", "main_comp_seasons.csv"), row.names = F) 75 | 76 | 77 | -------------------------------------------------------------------------------- /raw-data/transfermarkt_staff/get_staff_types.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | library(here) 3 | 4 | url <- "https://www.transfermarkt.com/real-madrid/mitarbeiterhistorie/verein/418" 5 | history_pg <- xml2::read_html(url) 6 | 7 | staff <- history_pg %>% rvest::html_nodes(".auflistung tbody tr td") %>% rvest::html_nodes(".inline-select") 8 | 9 | staff_type_text <- staff %>% rvest::html_nodes("select option") %>% rvest::html_text() %>% stringr::str_squish() 10 | staff_type_idx <- staff %>% rvest::html_nodes("select option") %>% rvest::html_attr("value") 11 | staff_types <- data.frame(staff_type_idx = staff_type_idx, staff_type_text = staff_type_text) %>% dplyr::filter(staff_type_text != "") 12 | write.csv(staff_types, here("raw-data", "transfermarkt_staff", "tm_staff_types.csv"), row.names = F) 13 | -------------------------------------------------------------------------------- /raw-data/transfermarkt_staff/tm_staff_types.csv: -------------------------------------------------------------------------------- 1 | "staff_type_idx","staff_type_text" 2 | "1","Manager" 3 | "10","Caretaker Manager" 4 | "2","Assistant Manager" 5 | "3","Goalkeeping Coach" 6 | "11","Conditioning Coach" 7 | "22","Fitness Coach" 8 | "16","Chief Analyst" 9 | "63","Athletic Coach" 10 | "23","Rehab Coach" 11 | "104","Youth Coach" 12 | "70","Video Analyst" 13 | "145","Coordinator of talent management" 14 | "13","Director of Football" 15 | "54","Sporting Director" 16 | "43","Head of Football Operations" 17 | "68","Technical Director" 18 | "25","Chief Executive Officer" 19 | "102","Adivser of management" 20 | "17","President" 21 | "27","Vice-President" 22 | "28","Chairman" 23 | "113","Vice-Chairman" 24 | "39","Board Member" 25 | "59","Member of administrative board" 26 | "83","Marketing/Management" 27 | "57","Director of Marketing and Sales" 28 | "86","Honorary President" 29 | "90","Head of Scouting" 30 | "7","Scout" 31 | "166","Youth Scout" 32 | "35","Academy manager" 33 | "19","Club Doctor" 34 | "130","Nutritionist" 35 | "44","Marketing Staff" 36 | "9","Team official" 37 | "149","Club representative" 38 | -------------------------------------------------------------------------------- /worldfootballR_data.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: a5dcf981-a6ce-4c66-83c6-2505bda960f8 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | --------------------------------------------------------------------------------