├── src ├── function │ ├── solver │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ ├── util.py │ │ │ ├── bigquery.py │ │ │ └── storage.py │ │ ├── solver │ │ │ ├── __init__.py │ │ │ ├── queries.py │ │ │ ├── models.py │ │ │ └── solver.py │ │ ├── requirements.txt │ │ └── main.py │ ├── hkjc_get_odds │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── footystats_get_data │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ ├── util.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── hkjc_get_results │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── hkjc_get_team_list │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── simulate_tournament │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ ├── util.py │ │ │ ├── bigquery.py │ │ │ └── storage.py │ │ ├── simulation │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── results.py │ │ │ │ ├── tiebreaker.py │ │ │ │ ├── team.py │ │ │ │ ├── table.py │ │ │ │ └── match.py │ │ │ ├── tournaments │ │ │ │ ├── __init__.py │ │ │ │ ├── rounds.py │ │ │ │ ├── winner.py │ │ │ │ ├── groups.py │ │ │ │ ├── season.py │ │ │ │ ├── knockout.py │ │ │ │ └── tournament.py │ │ │ └── queries.py │ │ ├── requirements.txt │ │ └── main.py │ ├── solver_publish_messages │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── pubsub.py │ │ │ ├── util.py │ │ │ └── bigquery.py │ │ ├── requirements.txt │ │ └── main.py │ ├── footystats_get_league_list │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── footystats_transform_matches │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ └── storage.py │ │ ├── requirements.txt │ │ └── main.py │ ├── simulation_publish_messages │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── pubsub.py │ │ │ ├── util.py │ │ │ └── bigquery.py │ │ ├── requirements.txt │ │ └── main.py │ ├── footystats_publish_season_ids_delta │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── pubsub.py │ │ │ ├── util.py │ │ │ └── bigquery.py │ │ ├── requirements.txt │ │ └── main.py │ └── footystats_publish_season_ids_initial │ │ ├── gcp │ │ ├── __init__.py │ │ ├── pubsub.py │ │ ├── util.py │ │ └── bigquery.py │ │ ├── requirements.txt │ │ └── main.py └── bigquery │ ├── sql │ ├── hkjc │ │ ├── odds_latest.sql │ │ ├── scores.sql │ │ ├── odds_today.sql │ │ └── odds_clean.sql │ ├── solver │ │ ├── teams_latest.sql │ │ ├── leagues_latest.sql │ │ ├── teams_7d.sql │ │ ├── team_ratings_7d.sql │ │ ├── team_ratings.sql │ │ ├── get_messages.sql │ │ └── get_matches.sql │ ├── simulation │ │ ├── get_avg_goal_home_adv.sql │ │ ├── leagues_latest.sql │ │ ├── get_groups.sql │ │ ├── get_teams.sql │ │ ├── get_messages.sql │ │ └── get_matches.sql │ ├── footystats │ │ ├── get_season_id_initial.sql │ │ └── get_season_id_delta.sql │ ├── operations │ │ ├── map_hkjc_tournaments.sql │ │ ├── map_hkjc_teams.sql │ │ ├── map_hkjc_team_list.sql │ │ ├── get_daily_suggestions.sql │ │ └── get_kelly_ratio.sql │ ├── functions │ │ ├── accent_to_latin.sql │ │ └── matchProbs.js │ ├── outputs │ │ ├── team_ratings_international.sql │ │ ├── simulation_copa_ko.sql │ │ ├── simulation_wcq_r2.sql │ │ ├── simulation_acq.sql │ │ ├── simulation_asc_ko.sql │ │ ├── simulation_cwc_ko.sql │ │ ├── simulation_aco_ko.sql │ │ ├── simulation_euro_ko.sql │ │ ├── team_ratings_club.sql │ │ ├── simulation_wcq_r3.sql │ │ ├── simulation_acle_ko.sql │ │ ├── simulation_cl1.sql │ │ ├── simulation_ucl_ko.sql │ │ ├── simulation_j1.sql │ │ ├── simulation_copa_gs.sql │ │ ├── simulation_csl.sql │ │ ├── simulation_hkpl.sql │ │ ├── simulation_ll.sql │ │ ├── simulation_sea.sql │ │ ├── simulation_bun.sql │ │ ├── simulation_epl.sql │ │ ├── simulation_cwc_gs.sql │ │ ├── simulation_li1.sql │ │ ├── simulation_acl2_gs.sql │ │ ├── simulation_aco_gs.sql │ │ ├── simulation_euro_gs.sql │ │ ├── simulation_asc_gs.sql │ │ ├── simulation_acle_gs.sql │ │ ├── simulation_ucl_gs.sql │ │ └── results.sql │ └── master │ │ ├── leagues.sql │ │ └── teams.sql │ └── schema │ ├── solver │ ├── leagues.json │ └── teams.json │ ├── manual │ ├── teams.json │ └── leagues.json │ ├── hkjc │ ├── teams.json │ └── results.json │ ├── footystats │ ├── matches_transformed.json │ ├── league_list.json │ └── teams.json │ └── master │ ├── teams.json │ └── leagues.json ├── .gitignore ├── infrastructure ├── modules │ ├── pubsub │ │ ├── main.tf │ │ ├── variables.tf │ │ └── output.tf │ ├── scheduled-function │ │ ├── output.tf │ │ ├── main.tf │ │ └── variables.tf │ ├── secret │ │ ├── output.tf │ │ ├── variables.tf │ │ └── main.tf │ ├── service-accounts │ │ ├── output.tf │ │ ├── variables.tf │ │ └── main.tf │ ├── storage │ │ ├── output.tf │ │ ├── variables.tf │ │ └── main.tf │ ├── project │ │ ├── main.tf │ │ ├── output.tf │ │ └── variables.tf │ ├── services │ │ ├── output.tf │ │ ├── variables.tf │ │ └── main.tf │ ├── bigquery │ │ ├── output.tf │ │ ├── variables.tf │ │ └── main.tf │ └── event-function │ │ ├── main.tf │ │ └── variables.tf ├── development │ └── variables.tf └── production │ └── variables.tf ├── assets ├── simulation │ ├── France Ligue 1.json │ ├── Italy Serie A.json │ ├── Japan J1 League.json │ ├── Spain La Liga.json │ ├── China China League One.json │ ├── Germany Bundesliga.json │ ├── China Chinese Super League.json │ ├── England Premier League.json │ ├── Hong Kong Hong Kong Premier League.json │ ├── International WC Qualification Asia.json │ ├── International FIFA Club World Cup.json │ ├── International Asian Cup Qualification.json │ ├── Asia AFC Champions League.json │ ├── Europe UEFA Champions League.json │ └── Asia AFC Cup.json └── leagues.csv └── README.md /src/function/solver/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/solver/solver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/hkjc_get_odds/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/footystats_get_data/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/hkjc_get_results/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/hkjc_get_team_list/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/solver_publish_messages/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/footystats_get_league_list/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/footystats_transform_matches/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/function/solver_publish_messages/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-bigquery 3 | google-cloud-pubsub -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .pytest_cache 3 | .vscode 4 | *.zip 5 | *terraform* 6 | credentials 7 | .venv 8 | tmp 9 | __pycache__ -------------------------------------------------------------------------------- /src/function/footystats_transform_matches/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage -------------------------------------------------------------------------------- /src/function/hkjc_get_odds/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage 4 | requests -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-bigquery 3 | google-cloud-pubsub -------------------------------------------------------------------------------- /src/function/solver/requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud-bigquery 2 | google-cloud-logging 3 | google-cloud-storage 4 | pandas 5 | pulp -------------------------------------------------------------------------------- /src/function/footystats_get_data/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage 4 | requests -------------------------------------------------------------------------------- /src/function/hkjc_get_team_list/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage 4 | requests -------------------------------------------------------------------------------- /src/function/footystats_get_league_list/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage 4 | requests -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-bigquery 3 | google-cloud-pubsub -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-bigquery 3 | google-cloud-pubsub -------------------------------------------------------------------------------- /src/function/hkjc_get_results/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | google-cloud-logging 3 | google-cloud-storage 4 | pytz 5 | requests -------------------------------------------------------------------------------- /infrastructure/modules/pubsub/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_pubsub_topic" "topic" { 2 | name = var.topic 3 | project = var.project_id 4 | } 5 | -------------------------------------------------------------------------------- /src/bigquery/sql/hkjc/odds_latest.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM `hkjc.odds_clean` 3 | QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY _TIMESTAMP DESC) = 1 -------------------------------------------------------------------------------- /src/function/simulate_tournament/requirements.txt: -------------------------------------------------------------------------------- 1 | functions-framework 2 | numpy 3 | google-cloud-bigquery 4 | google-cloud-logging 5 | google-cloud-storage -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .match import Match 2 | from .tiebreaker import TieBreaker 3 | from .team import Team 4 | -------------------------------------------------------------------------------- /src/function/solver/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /assets/simulation/France Ligue 1.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": false, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /assets/simulation/Italy Serie A.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /assets/simulation/Japan J1 League.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /assets/simulation/Spain La Liga.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /src/function/hkjc_get_odds/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /assets/simulation/China China League One.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /assets/simulation/Germany Bundesliga.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": false, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /src/function/footystats_get_data/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /src/function/hkjc_get_results/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /src/function/hkjc_get_team_list/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /assets/simulation/China Chinese Super League.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /assets/simulation/England Premier League.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": false, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /infrastructure/modules/scheduled-function/output.tf: -------------------------------------------------------------------------------- 1 | output "pubsub_topic_name" { 2 | value = module.pubsub.topic 3 | description = "PubSub topic name" 4 | } 5 | -------------------------------------------------------------------------------- /src/function/footystats_get_league_list/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /src/function/footystats_transform_matches/gcp/logging.py: -------------------------------------------------------------------------------- 1 | import google.cloud.logging 2 | 3 | 4 | def setup_logging(): 5 | google.cloud.logging.Client().setup_logging() 6 | -------------------------------------------------------------------------------- /assets/simulation/Hong Kong Hong Kong Premier League.json: -------------------------------------------------------------------------------- 1 | { 2 | "Regular Season": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": 2 6 | } 7 | } -------------------------------------------------------------------------------- /infrastructure/modules/secret/output.tf: -------------------------------------------------------------------------------- 1 | output "secret_ids" { 2 | value = { for k, v in google_secret_manager_secret.secrets : k => v.secret_id } 3 | description = "The id of secrets." 4 | } 5 | -------------------------------------------------------------------------------- /src/bigquery/sql/hkjc/scores.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | id, 3 | homeResult AS home_score, 4 | awayResult AS away_score 5 | FROM hkjc.results, 6 | UNNEST(results.results) AS _results 7 | WHERE stageID = 5 8 | AND resultType = 1 -------------------------------------------------------------------------------- /src/bigquery/sql/solver/teams_latest.sql: -------------------------------------------------------------------------------- 1 | SELECT teams.* 2 | FROM solver.teams 3 | JOIN ( 4 | SELECT _TYPE, MAX(_DATE_UNIX) AS _DATE_UNIX 5 | FROM `solver.teams` 6 | GROUP BY _TYPE 7 | ) latest 8 | USING (_TYPE, _DATE_UNIX) -------------------------------------------------------------------------------- /infrastructure/modules/service-accounts/output.tf: -------------------------------------------------------------------------------- 1 | output "emails" { 2 | description = "Service account emails by name." 3 | value = { for name, account in google_service_account.service_accounts : name => account.email } 4 | } 5 | -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/get_avg_goal_home_adv.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | avg_goal, 3 | home_adv 4 | FROM `${project_id}.solver.leagues` solver 5 | JOIN `${project_id}.master.leagues` master USING (division) 6 | WHERE footystats_name = league -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/__init__.py: -------------------------------------------------------------------------------- 1 | from .groups import Groups 2 | from .knockout import Knockout 3 | from .rounds import Round 4 | from .season import Season 5 | from .tournament import Tournament 6 | from .winner import Winner 7 | -------------------------------------------------------------------------------- /infrastructure/modules/pubsub/variables.tf: -------------------------------------------------------------------------------- 1 | variable "topic" { 2 | description = "The Pub/Sub topic name." 3 | type = string 4 | } 5 | 6 | variable "project_id" { 7 | description = "The project ID to manage the Pub/Sub resources." 8 | type = string 9 | } 10 | -------------------------------------------------------------------------------- /src/bigquery/sql/solver/leagues_latest.sql: -------------------------------------------------------------------------------- 1 | SELECT leagues.* 2 | FROM solver.leagues 3 | JOIN ( 4 | SELECT _TYPE, MAX(_DATE_UNIX) AS max_date_unix 5 | FROM `solver.leagues` 6 | GROUP BY _TYPE 7 | ) latest 8 | ON leagues._TYPE = latest._TYPE AND leagues._DATE_UNIX = latest.max_date_unix -------------------------------------------------------------------------------- /src/bigquery/sql/footystats/get_season_id_initial.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | name, 3 | season.id AS season_id, 4 | season.country, 5 | season.year 6 | FROM ${project_id}.footystats.league_list, 7 | league_list.season 8 | WHERE CAST(RIGHT(season.year, 4) AS INT) >= EXTRACT(YEAR FROM CURRENT_DATE()) - 5 -------------------------------------------------------------------------------- /infrastructure/modules/pubsub/output.tf: -------------------------------------------------------------------------------- 1 | output "topic" { 2 | description = "The name of the Pub/Sub topic." 3 | value = google_pubsub_topic.topic.name 4 | } 5 | 6 | output "id" { 7 | description = "The ID of the Pub/Sub topic." 8 | value = google_pubsub_topic.topic.id 9 | } 10 | -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/leagues_latest.sql: -------------------------------------------------------------------------------- 1 | SELECT leagues.* 2 | FROM simulation.leagues 3 | JOIN ( 4 | SELECT _LEAGUE, MAX(_DATE_UNIX) AS max_date_unix 5 | FROM `simulation.leagues` 6 | GROUP BY _LEAGUE 7 | ) latest 8 | ON leagues._LEAGUE = latest._LEAGUE AND leagues._DATE_UNIX = latest.max_date_unix -------------------------------------------------------------------------------- /src/bigquery/sql/hkjc/odds_today.sql: -------------------------------------------------------------------------------- 1 | SELECT * EXCEPT(_TIMESTAMP) 2 | FROM `hkjc.odds_clean` 3 | WHERE _TIMESTAMP = ( 4 | SELECT MAX(_TIMESTAMP) 5 | FROM `hkjc.odds` 6 | ) 7 | AND tournament_id NOT IN ('E2Q', 'CLB', 'CUP') 8 | AND home_name NOT LIKE '%奧足' 9 | AND home_name NOT LIKE '%U2_' 10 | AND home_name NOT LIKE '%女足' -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/results.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | class Results(defaultdict): 5 | def __init__(self): 6 | super().__init__(int) 7 | 8 | def __truediv__(self, other): 9 | for key in self: 10 | self[key] /= other 11 | return self 12 | -------------------------------------------------------------------------------- /src/bigquery/sql/solver/teams_7d.sql: -------------------------------------------------------------------------------- 1 | SELECT teams.* 2 | FROM solver.teams 3 | JOIN ( 4 | SELECT _TYPE, MAX(_DATE_UNIX) AS _DATE_UNIX 5 | FROM `solver.teams` 6 | WHERE _DATE_UNIX <= ( 7 | SELECT MAX(_DATE_UNIX) - (7 * 24 * 60 * 60) 8 | FROM `solver.teams` 9 | ) 10 | GROUP BY _TYPE 11 | ) latest 12 | USING (_TYPE, _DATE_UNIX) -------------------------------------------------------------------------------- /infrastructure/modules/service-accounts/variables.tf: -------------------------------------------------------------------------------- 1 | variable "roles" { 2 | description = "Roles to apply to service accounts by name." 3 | type = map(list(string)) 4 | default = {} 5 | } 6 | 7 | variable "project_id" { 8 | description = "Project id where service account will be created." 9 | type = string 10 | } 11 | -------------------------------------------------------------------------------- /infrastructure/modules/secret/variables.tf: -------------------------------------------------------------------------------- 1 | variable "secrets" { 2 | description = "A map of objects, which include the name and the secret." 3 | type = map(string) 4 | default = {} 5 | } 6 | 7 | variable "project_id" { 8 | description = "The project ID to manage the Secret Manager resources." 9 | type = string 10 | } 11 | -------------------------------------------------------------------------------- /src/bigquery/schema/solver/leagues.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "division", 4 | "type": "STRING", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "avg_goal", 9 | "type": "FLOAT", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "home_adv", 14 | "type": "FLOAT" 15 | } 16 | ] -------------------------------------------------------------------------------- /infrastructure/modules/storage/output.tf: -------------------------------------------------------------------------------- 1 | output "names" { 2 | description = "Bucket names." 3 | value = { for name, bucket in google_storage_bucket.buckets : 4 | name => bucket.name 5 | } 6 | } 7 | 8 | output "urls" { 9 | description = "Bucket URLs." 10 | value = { for name, bucket in google_storage_bucket.buckets : 11 | name => bucket.url 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/bigquery/schema/solver/teams.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "id", 4 | "type": "STRING", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "offence", 9 | "type": "FLOAT", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "defence", 14 | "type": "FLOAT", 15 | "mode": "REQUIRED" 16 | } 17 | ] -------------------------------------------------------------------------------- /src/bigquery/sql/solver/team_ratings_7d.sql: -------------------------------------------------------------------------------- 1 | WITH match_probs AS ( 2 | SELECT 3 | id, 4 | _TYPE, 5 | functions.matchProbs(GREATEST(1.37 + offence, 0.2), GREATEST(1.37 + defence, 0.2), '0') AS had_probs, 6 | _DATE_UNIX 7 | FROM solver.teams_7d 8 | ) 9 | 10 | SELECT 11 | id, 12 | _TYPE, 13 | (had_probs[0] * 3 + had_probs[1]) / 3 * 100 AS rating 14 | FROM match_probs -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/rounds.py: -------------------------------------------------------------------------------- 1 | from typing import Protocol 2 | 3 | from simulation.models import Team 4 | 5 | 6 | class Round(Protocol): 7 | def add_teams(self, teams: list[Team]): ... 8 | 9 | def simulate(self): ... 10 | 11 | def get_advanced(self, end: int, start: int) -> list[Team]: ... 12 | 13 | def reset(self): ... 14 | -------------------------------------------------------------------------------- /src/bigquery/schema/manual/teams.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "footystats_id", 4 | "type": "INTEGER", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "hkjc_id", 9 | "type": "INTEGER" 10 | }, 11 | { 12 | "name": "transfermarkt_id", 13 | "type": "INTEGER" 14 | }, 15 | { 16 | "name": "name_ch", 17 | "type": "STRING" 18 | } 19 | ] -------------------------------------------------------------------------------- /src/function/solver_publish_messages/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import functions_framework 4 | 5 | from gcp import bigquery, pubsub 6 | 7 | 8 | @functions_framework.cloud_event 9 | def main(_): 10 | for message in bigquery.query_dict(query="SELECT * FROM `solver.get_messages`();"): 11 | pubsub.publish_json_message( 12 | topic=os.environ["TOPIC_NAME"], 13 | data=message, 14 | ) 15 | -------------------------------------------------------------------------------- /src/bigquery/sql/operations/map_hkjc_tournaments.sql: -------------------------------------------------------------------------------- 1 | SELECT MAX(kick_off_time) AS kick_off_time, tournament_id, tournament_name 2 | FROM hkjc.odds_latest 3 | LEFT JOIN master.leagues ON odds_latest.tournament_id = leagues.hkjc_id 4 | WHERE leagues.transfermarkt_id IS NULL 5 | AND tournament_id NOT IN ('CUP', 'CLB', 'INT') 6 | AND tournament_name NOT LIKE '女%' 7 | AND tournament_name NOT LIKE 'U%' 8 | GROUP BY tournament_id, tournament_name -------------------------------------------------------------------------------- /src/bigquery/sql/footystats/get_season_id_delta.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT _NAME AS name, 3 | _SEASON_ID AS season_id, 4 | _COUNTRY AS country, 5 | _YEAR AS year 6 | FROM `${project_id}.footystats.matches` 7 | WHERE 8 | date_unix > UNIX_SECONDS(TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -3 DAY)) 9 | AND ((status = 'incomplete' AND date_unix < UNIX_SECONDS(CURRENT_TIMESTAMP())) 10 | OR (status = 'complete' AND total_xg = 0)) -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/get_groups.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT specific_tables.round, group_tables.name, table.id 3 | FROM ${project_id}.footystats.tables 4 | CROSS JOIN tables.specific_tables 5 | CROSS JOIN specific_tables.groups group_tables 6 | CROSS JOIN group_tables.table 7 | JOIN ${project_id}.master.leagues ON tables._SEASON_ID = leagues.latest_season_id 8 | WHERE tables._NAME = league 9 | AND tables._SEASON_ID = leagues.latest_season_id -------------------------------------------------------------------------------- /src/function/solver_publish_messages/gcp/pubsub.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from google.cloud import pubsub_v1 5 | 6 | CLIENT = pubsub_v1.PublisherClient() 7 | 8 | 9 | def get_topic_path(topic: str) -> str: 10 | return CLIENT.topic_path(project=os.environ["GCP_PROJECT"], topic=topic) 11 | 12 | 13 | def publish_json_message(topic: str, data: dict): 14 | return CLIENT.publish(topic, data=json.dumps(data).encode()) 15 | -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/gcp/pubsub.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from google.cloud import pubsub_v1 5 | 6 | CLIENT = pubsub_v1.PublisherClient() 7 | 8 | 9 | def get_topic_path(topic: str) -> str: 10 | return CLIENT.topic_path(project=os.environ["GCP_PROJECT"], topic=topic) 11 | 12 | 13 | def publish_json_message(topic: str, data: dict): 14 | return CLIENT.publish(topic, data=json.dumps(data).encode()) 15 | -------------------------------------------------------------------------------- /infrastructure/modules/project/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_project" "project" { 2 | name = var.name 3 | project_id = var.project_id 4 | billing_account = var.billing_account 5 | } 6 | 7 | module "services" { 8 | source = "../services" 9 | 10 | project_id = google_project.project.project_id 11 | activate_apis = var.activate_apis 12 | activate_api_identities = var.activate_api_identities 13 | } 14 | -------------------------------------------------------------------------------- /infrastructure/modules/secret/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_secret_manager_secret" "secrets" { 2 | for_each = var.secrets 3 | 4 | secret_id = each.key 5 | project = var.project_id 6 | replication { 7 | auto {} 8 | } 9 | } 10 | 11 | resource "google_secret_manager_secret_version" "versions" { 12 | for_each = var.secrets 13 | 14 | secret = google_secret_manager_secret.secrets[each.key].id 15 | secret_data = each.value 16 | } 17 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/gcp/pubsub.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from google.cloud import pubsub_v1 5 | 6 | CLIENT = pubsub_v1.PublisherClient() 7 | 8 | 9 | def get_topic_path(topic: str) -> str: 10 | return CLIENT.topic_path(project=os.environ["GCP_PROJECT"], topic=topic) 11 | 12 | 13 | def publish_json_message(topic: str, data: dict): 14 | return CLIENT.publish(topic, data=json.dumps(data).encode()) 15 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/gcp/pubsub.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from google.cloud import pubsub_v1 5 | 6 | CLIENT = pubsub_v1.PublisherClient() 7 | 8 | 9 | def get_topic_path(topic: str) -> str: 10 | return CLIENT.topic_path(project=os.environ["GCP_PROJECT"], topic=topic) 11 | 12 | 13 | def publish_json_message(topic: str, data: dict): 14 | return CLIENT.publish(topic, data=json.dumps(data).encode()) 15 | -------------------------------------------------------------------------------- /infrastructure/modules/services/output.tf: -------------------------------------------------------------------------------- 1 | output "enabled_apis" { 2 | description = "Enabled APIs in the project." 3 | value = [for api in google_project_service.project_services : api.service] 4 | 5 | depends_on = [time_sleep.wait_activate_api] 6 | } 7 | 8 | output "enabled_api_identities" { 9 | description = "Enabled API identities in the project." 10 | value = { for i in google_project_service_identity.project_service_identities : i.service => i.email } 11 | } 12 | -------------------------------------------------------------------------------- /src/bigquery/schema/hkjc/teams.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "id", 4 | "type": "INTEGER", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "code", 9 | "type": "STRING", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "name_ch", 14 | "type": "STRING", 15 | "mode": "REQUIRED" 16 | }, 17 | { 18 | "name": "name_en", 19 | "type": "STRING", 20 | "mode": "REQUIRED" 21 | } 22 | ] -------------------------------------------------------------------------------- /src/function/solver/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /infrastructure/development/variables.tf: -------------------------------------------------------------------------------- 1 | variable "environment" { 2 | default = "dev" 3 | type = string 4 | description = "The environment to prepare (e.g., 'dev', 'prod')." 5 | } 6 | 7 | variable "billing_account" { 8 | type = string 9 | description = "The ID of the billing account to associate this project with" 10 | } 11 | 12 | variable "region" { 13 | default = "asia-east2" 14 | type = string 15 | description = "Region where resources are created." 16 | } 17 | -------------------------------------------------------------------------------- /infrastructure/production/variables.tf: -------------------------------------------------------------------------------- 1 | variable "environment" { 2 | description = "The environment to prepare (e.g., 'dev', 'prod')." 3 | type = string 4 | default = "dev" 5 | } 6 | 7 | variable "billing_account" { 8 | description = "The ID of the billing account to associate this project with" 9 | type = string 10 | } 11 | 12 | variable "region" { 13 | description = "Region where resources are created." 14 | type = string 15 | default = "asia-east2" 16 | } 17 | -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/get_teams.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | footystats.id AS name, 3 | offence, 4 | defence 5 | FROM `${project_id}.master.teams` master 6 | JOIN `${project_id}.footystats.teams` footystats ON master.footystats_id = footystats.id 7 | JOIN `${project_id}.master.leagues` leagues ON footystats._SEASON_ID = leagues.latest_season_id 8 | JOIN `${project_id}.solver.teams_latest` solver ON master.solver_id = solver.id 9 | AND leagues.type = solver._TYPE 10 | WHERE leagues.footystats_name = league -------------------------------------------------------------------------------- /src/function/footystats_get_data/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/function/solver_publish_messages/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import functions_framework 4 | 5 | from gcp import bigquery, pubsub 6 | 7 | 8 | @functions_framework.cloud_event 9 | def main(_): 10 | seasons = bigquery.query_dict( 11 | query="SELECT * FROM footystats.get_season_id_delta();" 12 | ) 13 | for season in seasons: 14 | pubsub.publish_json_message( 15 | topic=os.environ["TOPIC_NAME"], 16 | data={"endpoint": "matches", **season}, 17 | ) 18 | -------------------------------------------------------------------------------- /src/bigquery/sql/solver/team_ratings.sql: -------------------------------------------------------------------------------- 1 | WITH match_probs AS ( 2 | SELECT 3 | id, 4 | _TYPE, 5 | GREATEST(1.37 + offence, 0.2) AS offence, 6 | GREATEST(1.37 + defence, 0.2) AS defence, 7 | functions.matchProbs(GREATEST(1.37 + offence, 0.2), GREATEST(1.37 + defence, 0.2), '0') AS had_probs, 8 | _DATE_UNIX 9 | FROM solver.teams_latest 10 | ) 11 | 12 | SELECT 13 | id, 14 | _TYPE, 15 | offence, 16 | defence, 17 | (had_probs[0] * 3 + had_probs[1]) / 3 * 100 AS rating, 18 | _DATE_UNIX 19 | FROM match_probs -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/gcp/util.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | def safe_load_json(s: str) -> dict | str: 8 | try: 9 | return json.loads(s) 10 | except (json.decoder.JSONDecodeError, TypeError): 11 | return s 12 | 13 | 14 | def decode_message(cloud_event: CloudEvent) -> dict | str: 15 | data = base64.b64decode(cloud_event.data["message"]["data"]).decode("utf-8") 16 | return safe_load_json(data) 17 | -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/get_messages.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | footystats_name, MAX(date_unix) AS latest_match_date, MAX(_DATE_UNIX) AS last_run 3 | FROM ${project_id}.master.leagues master 4 | JOIN ${project_id}.footystats.matches ON master.latest_season_id = matches._SEASON_ID 5 | LEFT JOIN ${project_id}.simulation.leagues simulation ON master.footystats_name = simulation._LEAGUE 6 | WHERE status = 'complete' 7 | AND is_simulate 8 | AND type = type 9 | GROUP BY footystats_name 10 | HAVING latest_match_date > COALESCE(last_run, 0) -------------------------------------------------------------------------------- /infrastructure/modules/bigquery/output.tf: -------------------------------------------------------------------------------- 1 | output "native_tables" { 2 | description = "Map of bigquery native table resources being provisioned." 3 | value = google_bigquery_table.native_tables 4 | } 5 | 6 | output "external_tables" { 7 | description = "Map of BigQuery external table resources being provisioned." 8 | value = google_bigquery_table.external_tables 9 | } 10 | 11 | output "routines" { 12 | description = "Map of BigQuery routines resources being provisioned." 13 | value = google_bigquery_routine.routines 14 | } 15 | -------------------------------------------------------------------------------- /infrastructure/modules/project/output.tf: -------------------------------------------------------------------------------- 1 | output "project_id" { 2 | description = "ID of the project." 3 | value = google_project.project.project_id 4 | 5 | depends_on = [ 6 | module.services.enabled_apis, 7 | module.services.enabled_api_identities 8 | ] 9 | } 10 | 11 | output "project_number" { 12 | description = "Numeric identifier for the project." 13 | value = google_project.project.number 14 | 15 | depends_on = [ 16 | module.services.enabled_apis, 17 | module.services.enabled_api_identities 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /src/bigquery/sql/simulation/get_matches.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | specific_tables.round, 3 | homeId, 4 | awayId, 5 | status, 6 | homeGoalCount, 7 | awayGoalCount 8 | FROM ${project_id}.footystats.matches 9 | JOIN ${project_id}.master.leagues ON matches._SEASON_ID = leagues.latest_season_id 10 | JOIN ${project_id}.footystats.tables USING (_SEASON_ID) 11 | JOIN tables.specific_tables ON matches.roundID = specific_tables.round_id 12 | WHERE matches._NAME = league 13 | AND home_name NOT LIKE 'Winner Match __' 14 | AND away_name NOT LIKE 'Winner Match __' -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/winner.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from simulation.models import Team 4 | 5 | 6 | @dataclass 7 | class Winner: 8 | def __post_init__(self): 9 | self.advance_to = None 10 | 11 | def add_teams(self, teams: list[Team]): 12 | if len(teams) != 1: 13 | raise ValueError("Winner round must have exactly one team.") 14 | 15 | teams[0].log_sim_rounds("winner") 16 | 17 | def simulate(self): 18 | pass 19 | 20 | def reset(self): 21 | pass 22 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/tiebreaker.py: -------------------------------------------------------------------------------- 1 | from .team import Team 2 | 3 | 4 | class TieBreaker: 5 | @staticmethod 6 | def h2h(team: Team) -> tuple: 7 | return ( 8 | team.table.points, 9 | team.h2h_table.points, 10 | team.h2h_table.goal_diff, 11 | team.h2h_table.scored, 12 | team.table.goal_diff, 13 | team.table.scored, 14 | ) 15 | 16 | @staticmethod 17 | def goal_diff(team: Team) -> tuple: 18 | return (team.table.points, team.table.goal_diff, team.table.scored) 19 | -------------------------------------------------------------------------------- /src/bigquery/schema/footystats/matches_transformed.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "id", 4 | "type": "INTEGER", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "home_adj", 9 | "type": "FLOAT", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "away_adj", 14 | "type": "FLOAT", 15 | "mode": "REQUIRED" 16 | }, 17 | { 18 | "name": "home_avg", 19 | "type": "FLOAT", 20 | "mode": "REQUIRED" 21 | }, 22 | { 23 | "name": "away_avg", 24 | "type": "FLOAT", 25 | "mode": "REQUIRED" 26 | } 27 | ] -------------------------------------------------------------------------------- /src/bigquery/schema/manual/leagues.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "footystats_id", 4 | "type": "STRING", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "name_ch", 9 | "type": "STRING" 10 | }, 11 | { 12 | "name": "hkjc_id", 13 | "type": "STRING" 14 | }, 15 | { 16 | "name": "transfermarkt_id", 17 | "type": "STRING" 18 | }, 19 | { 20 | "name": "is_simulate", 21 | "type": "BOOLEAN", 22 | "mode": "REQUIRED" 23 | }, 24 | { 25 | "name": "display_order", 26 | "type": "STRING" 27 | } 28 | ] -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import functions_framework 4 | from cloudevents.http.event import CloudEvent 5 | 6 | from gcp import bigquery, pubsub 7 | from gcp.util import decode_message 8 | 9 | 10 | @functions_framework.cloud_event 11 | def main(cloud_event: CloudEvent): 12 | endpoint = decode_message(cloud_event) 13 | 14 | seasons = bigquery.query_dict( 15 | query="SELECT * FROM footystats.get_season_id_initial();" 16 | ) 17 | for season in seasons: 18 | pubsub.publish_json_message( 19 | topic=os.environ["TOPIC_NAME"], 20 | data={"endpoint": endpoint, **season}, 21 | ) 22 | -------------------------------------------------------------------------------- /src/bigquery/sql/functions/accent_to_latin.sql: -------------------------------------------------------------------------------- 1 | (WITH lookups AS ( 2 | SELECT 3 | 'ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents, 4 | 'c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins ), 5 | 6 | pairs AS ( 7 | SELECT accent, 8 | latin 9 | FROM lookups, 10 | UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1, 11 | UNNEST(SPLIT(latins)) AS latin WITH OFFSET AS p2 12 | WHERE p1 = p2 ) 13 | 14 | SELECT STRING_AGG(IFNULL(latin, char), '') AS name 15 | FROM UNNEST(SPLIT(word, '')) char 16 | LEFT JOIN pairs 17 | ON char = accent) -------------------------------------------------------------------------------- /src/bigquery/sql/solver/get_messages.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | solver._TYPE, MAX(date_unix) AS latest_match_date, MAX(_DATE_UNIX) AS last_run 3 | FROM ${project_id}.footystats.matches 4 | JOIN `${project_id}.master.teams` home_teams ON matches.homeID = home_teams.footystats_id 5 | JOIN `${project_id}.master.teams` away_teams ON matches.awayID = away_teams.footystats_id 6 | JOIN ${project_id}.master.leagues ON matches._NAME = leagues.footystats_name 7 | JOIN ${project_id}.solver.leagues solver ON leagues.type = solver._TYPE 8 | WHERE status = 'complete' 9 | AND (home_teams.is_simulate 10 | OR away_teams.is_simulate 11 | OR leagues.is_simulate) 12 | GROUP BY solver._TYPE 13 | HAVING latest_match_date > COALESCE(last_run, 0) -------------------------------------------------------------------------------- /src/function/solver/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from gcp.util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from gcp.util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/function/solver_publish_messages/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from gcp.util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/bigquery/sql/operations/map_hkjc_teams.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | hkjc AS ( 3 | SELECT kick_off_time, tournament_name, home_id AS id, home_name AS name, home_name_en AS name_en 4 | FROM hkjc.odds_latest 5 | UNION ALL 6 | SELECT kick_off_time, tournament_name, away_id, away_name, away_name_en 7 | FROM hkjc.odds_latest) 8 | 9 | SELECT MAX(kick_off_time) AS kick_off_time, tournament_name, id, hkjc.name, name_en, footystats_id 10 | FROM hkjc 11 | LEFT JOIN master.teams ON hkjc.id = teams.hkjc_id 12 | WHERE transfermarkt_id IS NULL 13 | AND hkjc.name NOT LIKE '%U2_' 14 | AND hkjc.name NOT LIKE '%女足' 15 | AND hkjc.name NOT LIKE '%奧足' 16 | GROUP BY tournament_name, id, hkjc.name, name_en, footystats_id 17 | ORDER BY tournament_name, kick_off_time -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from gcp.util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_delta/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from .util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/function/footystats_publish_season_ids_initial/gcp/bigquery.py: -------------------------------------------------------------------------------- 1 | from google.cloud import bigquery 2 | 3 | from gcp.util import safe_load_json 4 | 5 | CLIENT = bigquery.Client() 6 | SQL_TYPES = {str: "STRING", int: "INT64"} 7 | 8 | 9 | def query_dict(query: str, params: dict | None = None) -> list[dict]: 10 | if params: 11 | job_config = bigquery.QueryJobConfig( 12 | query_parameters=[ 13 | bigquery.ScalarQueryParameter(key, SQL_TYPES[type(value)], value) 14 | for key, value in params.items() 15 | ] 16 | ) 17 | else: 18 | job_config = None 19 | query_job = CLIENT.query(query, job_config) 20 | return [safe_load_json(dict(row)) for row in query_job.result()] 21 | -------------------------------------------------------------------------------- /src/function/simulation_publish_messages/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import functions_framework 5 | from cloudevents.http.event import CloudEvent 6 | 7 | from gcp import bigquery, pubsub 8 | 9 | 10 | @functions_framework.cloud_event 11 | def main(cloud_event: CloudEvent): 12 | _type, table = re.match( 13 | r"_TYPE=(\w+)\/_DATE_UNIX=\d+\/(\w+)\.json", cloud_event.data["name"] 14 | ).groups() 15 | 16 | if table != "teams": 17 | return 18 | 19 | leagues = bigquery.query_dict( 20 | query="SELECT * FROM `simulation.get_messages`(@type);", params={"type": _type} 21 | ) 22 | for league in leagues: 23 | pubsub.publish_json_message( 24 | topic=os.environ["TOPIC_NAME"], 25 | data=league, 26 | ) 27 | -------------------------------------------------------------------------------- /infrastructure/modules/storage/variables.tf: -------------------------------------------------------------------------------- 1 | variable "names" { 2 | description = "Bucket name." 3 | type = list(string) 4 | } 5 | 6 | variable "suffix" { 7 | description = "Suffixes used to generate the bucket name." 8 | type = string 9 | default = "" 10 | } 11 | 12 | variable "location" { 13 | description = "Bucket location." 14 | type = string 15 | } 16 | 17 | variable "files" { 18 | description = "Map of name => list of file objects." 19 | type = map(list(string)) 20 | default = {} 21 | } 22 | 23 | variable "project_id" { 24 | description = "Bucket project id." 25 | type = string 26 | } 27 | 28 | variable "force_destroy" { 29 | description = "Defaults to false." 30 | type = bool 31 | default = false 32 | } 33 | -------------------------------------------------------------------------------- /infrastructure/modules/service-accounts/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_service_account" "service_accounts" { 2 | for_each = var.roles 3 | account_id = each.key 4 | project = var.project_id 5 | } 6 | 7 | locals { 8 | project_roles = { for pair in flatten([ 9 | for sa, roles in var.roles : [ 10 | for role in roles : { 11 | key = "${sa} ${role}" 12 | value = { 13 | sa = sa 14 | role = role 15 | } 16 | } 17 | ] 18 | ]) : pair.key => pair.value } 19 | } 20 | 21 | resource "google_project_iam_binding" "service_account_roles" { 22 | for_each = local.project_roles 23 | role = each.value.role 24 | members = ["serviceAccount:${google_service_account.service_accounts[each.value.sa].email}"] 25 | project = var.project_id 26 | } 27 | -------------------------------------------------------------------------------- /infrastructure/modules/storage/main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | file_list = flatten([ 3 | for bucket, files in var.files : [ 4 | for file in files : { 5 | bucket = bucket, 6 | file = file 7 | } 8 | ] 9 | ]) 10 | } 11 | 12 | resource "google_storage_bucket" "buckets" { 13 | for_each = toset(var.names) 14 | 15 | name = join("-", compact([each.value, var.suffix])) 16 | location = var.location 17 | force_destroy = var.force_destroy 18 | project = var.project_id 19 | } 20 | 21 | resource "google_storage_bucket_object" "files" { 22 | for_each = { for obj in local.file_list : "${obj.bucket}_${obj.file}" => obj } 23 | 24 | bucket = google_storage_bucket.buckets[each.value.bucket].name 25 | name = basename(each.value.file) 26 | source = each.value.file 27 | } 28 | -------------------------------------------------------------------------------- /assets/simulation/International WC Qualification Asia.json: -------------------------------------------------------------------------------- 1 | { 2 | "3rd Round": { 3 | "format": "Groups", 4 | "h2h": false, 5 | "leg": 2, 6 | "groups": { 7 | "Group A": [ 8 | 8607, 9 | 8589, 10 | 8615, 11 | 8611, 12 | 8621, 13 | 8597 14 | ], 15 | "Group B": [ 16 | 8619, 17 | 8606, 18 | 8580, 19 | 8595, 20 | 8594, 21 | 8620 22 | ], 23 | "Group C": [ 24 | 8579, 25 | 8582, 26 | 8588, 27 | 8584, 28 | 8585, 29 | 8737 30 | ] 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /src/bigquery/sql/operations/map_hkjc_team_list.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT fs_teams.id AS footystats_id, 3 | hkjc_teams.id AS hkjc_id, 4 | hkjc_teams.name_en, 5 | fs_teams.name, 6 | fs_teams.cleanName, 7 | fs_teams.english_name, 8 | hkjc_teams.name_ch, 9 | fs_teams.country 10 | FROM hkjc.teams hkjc_teams 11 | JOIN footystats.teams fs_teams ON REPLACE(hkjc_teams.name_en, 'Utd', 'United') = functions.accent_to_latin(fs_teams.cleanName) 12 | OR REPLACE(hkjc_teams.name_en, 'Utd', 'United') = functions.accent_to_latin(fs_teams.english_name) 13 | WHERE 14 | NOT EXISTS ( 15 | SELECT 1 16 | FROM manual.teams manual_teams 17 | WHERE hkjc_teams.id = manual_teams.hkjc_id 18 | ) 19 | AND NOT EXISTS ( 20 | SELECT 1 21 | FROM manual.teams manual_teams 22 | WHERE fs_teams.id = manual_teams.footystats_id) 23 | AND hkjc_teams.name_ch NOT LIKE '%女足' -------------------------------------------------------------------------------- /assets/simulation/International FIFA Club World Cup.json: -------------------------------------------------------------------------------- 1 | { 2 | "Group Stage": { 3 | "format": "Groups", 4 | "h2h": true, 5 | "leg": 1, 6 | "advance_to": { 7 | "Round of 16": { 8 | "start": 1, 9 | "end": 16 10 | } 11 | } 12 | }, 13 | "Round of 16": { 14 | "format": "Knockout", 15 | "leg": 1, 16 | "advance_to": "Quarter-finals" 17 | }, 18 | "Quarter-finals": { 19 | "format": "Knockout", 20 | "leg": 1, 21 | "advance_to": "Semi-finals" 22 | }, 23 | "Semi-finals": { 24 | "format": "Knockout", 25 | "leg": 1, 26 | "advance_to": "Final" 27 | }, 28 | "Final": { 29 | "format": "Knockout", 30 | "leg": 1, 31 | "advance_to": "Winner" 32 | }, 33 | "Winner": { 34 | "format": "Winner" 35 | } 36 | } -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/team_ratings_international.sql: -------------------------------------------------------------------------------- 1 | WITH latest AS ( 2 | SELECT 3 | RANK() OVER(ORDER BY rating DESC) AS rank, 4 | transfermarkt_id, 5 | id, 6 | name, 7 | ROUND(offence, 2) AS offence, 8 | ROUND(defence, 2) AS defence, 9 | ROUND(rating, 1) AS rating, 10 | _TYPE, 11 | _DATE_UNIX 12 | FROM solver.team_ratings 13 | JOIN master.teams ON team_ratings.id = teams.solver_id 14 | AND _TYPE = type 15 | WHERE team_ratings._TYPE = 'International' 16 | AND teams.in_team_rating 17 | ) 18 | 19 | SELECT 20 | rank, 21 | RANK() OVER(ORDER BY team_ratings_7d.rating DESC) - rank AS rank_7d_diff, 22 | transfermarkt_id, 23 | name, 24 | offence, 25 | defence, 26 | latest.rating, 27 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 28 | FROM latest 29 | JOIN solver.team_ratings_7d USING(id, _TYPE) 30 | ORDER BY rank; -------------------------------------------------------------------------------- /infrastructure/modules/services/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project_id" { 2 | description = "The GCP project you want to enable APIs on." 3 | type = string 4 | } 5 | 6 | variable "enable_apis" { 7 | description = "Whether to actually enable the APIs." 8 | type = bool 9 | default = true 10 | } 11 | 12 | variable "activate_apis" { 13 | description = "The list of apis to activate within the project." 14 | type = list(string) 15 | default = [] 16 | } 17 | 18 | variable "activate_api_identities" { 19 | description = "The list of service identities (Google Managed service account for the API) to force-create for the project (e.g. in order to grant additional roles)." 20 | type = map(list(string)) 21 | default = {} 22 | } 23 | 24 | variable "activate_api_sleep_duration" { 25 | description = "The duration to sleep in seconds before activating the apis." 26 | type = string 27 | default = "5m" 28 | } 29 | -------------------------------------------------------------------------------- /src/function/solver/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import functions_framework 4 | from cloudevents.http.event import CloudEvent 5 | 6 | 7 | from gcp import storage 8 | from gcp.logging import setup_logging 9 | from gcp.util import decode_message 10 | from solver import queries 11 | from solver.solver import solver 12 | 13 | setup_logging() 14 | 15 | 16 | @functions_framework.cloud_event 17 | def main(cloud_event: CloudEvent): 18 | message = decode_message(cloud_event) 19 | _type, latest_match_date = message["_TYPE"], message["latest_match_date"] 20 | 21 | data = queries.get_matches_and_teams(_type, latest_match_date) 22 | 23 | for name, data in solver(data["matches"], data["teams"], data["leagues"]).items(): 24 | storage.upload_json_to_bucket( 25 | data, 26 | blob_name=f"{name}.json", 27 | bucket_name=os.environ["BUCKET_NAME"], 28 | hive_partitioning={"_TYPE": _type, "_DATE_UNIX": latest_match_date}, 29 | ) 30 | -------------------------------------------------------------------------------- /src/bigquery/schema/footystats/league_list.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "name", 4 | "type": "STRING", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "image", 9 | "type": "STRING", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "season", 14 | "type": "STRUCT", 15 | "mode": "REPEATED", 16 | "fields": [ 17 | { 18 | "name": "id", 19 | "type": "INTEGER", 20 | "mode": "REQUIRED" 21 | }, 22 | { 23 | "name": "year", 24 | "type": "STRING", 25 | "mode": "REQUIRED" 26 | }, 27 | { 28 | "name": "country", 29 | "type": "STRING", 30 | "mode": "REQUIRED" 31 | } 32 | ] 33 | }, 34 | { 35 | "name": "country", 36 | "type": "STRING", 37 | "mode": "REQUIRED" 38 | } 39 | ] -------------------------------------------------------------------------------- /infrastructure/modules/project/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "The name for the project." 3 | type = string 4 | } 5 | 6 | variable "project_id" { 7 | description = "The ID to give the project." 8 | type = string 9 | } 10 | 11 | variable "billing_account" { 12 | description = "The ID of the billing account to associate this project with." 13 | type = string 14 | } 15 | 16 | variable "activate_apis" { 17 | description = "The list of apis to activate within the project" 18 | type = list(string) 19 | default = [] 20 | } 21 | 22 | variable "activate_api_identities" { 23 | description = "The list of service identities (Google Managed service account for the API) to force-create for the project (e.g. in order to grant additional roles)." 24 | type = map(list(string)) 25 | default = {} 26 | } 27 | 28 | variable "activate_api_sleep_duration" { 29 | description = "The duration to sleep after activating the apis." 30 | type = string 31 | default = "5m" 32 | } 33 | -------------------------------------------------------------------------------- /src/function/footystats_get_league_list/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import functions_framework 5 | import requests 6 | 7 | from gcp import storage 8 | from gcp.logging import setup_logging 9 | 10 | setup_logging() 11 | 12 | 13 | @functions_framework.cloud_event 14 | def main(_): 15 | data = get_footystats( 16 | endpoint="list", 17 | key=os.environ["FOOTYSTATS_API_KEY"], 18 | chosen_leagues_only="true", 19 | ) 20 | storage.upload_json_to_bucket( 21 | data, 22 | blob_name="league_list.json", 23 | bucket_name=os.environ["BUCKET_NAME"], 24 | ) 25 | 26 | 27 | def get_footystats(endpoint: str, key: str, **kwargs) -> dict: 28 | logging.info(f"Getting footystats data: {endpoint=}, {kwargs=}") 29 | response = requests.get( 30 | f"https://api.football-data-api.com/league-{endpoint}", 31 | params={"key": key, **kwargs}, 32 | timeout=5, 33 | ) 34 | response.raise_for_status() 35 | data = response.json()["data"] 36 | logging.info(f"Got footystats data: {endpoint=}, {kwargs=}") 37 | return data 38 | -------------------------------------------------------------------------------- /assets/simulation/International Asian Cup Qualification.json: -------------------------------------------------------------------------------- 1 | { 2 | "3rd Round": { 3 | "format": "Groups", 4 | "h2h": true, 5 | "leg": 2, 6 | "groups": { 7 | "Group A": [ 8 | 8591, 9 | 8608, 10 | 8598, 11 | 8617 12 | ], 13 | "Group B": [ 14 | 8600, 15 | 8610, 16 | 8602, 17 | 8604 18 | ], 19 | "Group C": [ 20 | 8581, 21 | 8618, 22 | 8614, 23 | 8583 24 | ], 25 | "Group D": [ 26 | 8613, 27 | 8603, 28 | 8586, 29 | 8596 30 | ], 31 | "Group E": [ 32 | 8590, 33 | 8587, 34 | 8622, 35 | 8593 36 | ], 37 | "Group F": [ 38 | 8612, 39 | 8601, 40 | 8623, 41 | 8616 42 | ] 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_copa_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 9 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 10 | COALESCE(rounds.FINAL, 0) AS f, 11 | COALESCE(rounds.CHAMPS, 0) AS champ, 12 | leagues._DATE_UNIX 13 | FROM `simulation.leagues_latest` leagues 14 | JOIN master.teams ON leagues.team = teams.footystats_id 15 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 16 | WHERE _LEAGUE = 'International Copa America' 17 | AND team_ratings._TYPE = 'International' 18 | ) 19 | 20 | SELECT 21 | transfermarkt_id, 22 | name, 23 | ROUND(rating, 1) AS rating, 24 | ROUND(offence, 2) AS offence, 25 | ROUND(defence, 2) AS defence, 26 | ROUND(qf, 3) AS qf, 27 | ROUND(sf, 3) AS sf, 28 | ROUND(f, 3) AS f, 29 | ROUND(champ, 3) AS champ, 30 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 3 HOUR), 'Asia/Hong_Kong') AS date_unix 31 | FROM result 32 | ORDER BY champ DESC, f DESC, sf DESC, qf DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_wcq_r2.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | leagues.group, 6 | rating, 7 | offence, 8 | defence, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) AS wc, 11 | COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS r4, 12 | leagues._DATE_UNIX 13 | FROM `simulation.leagues_latest` leagues 14 | JOIN master.teams ON leagues.team = teams.footystats_id 15 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 16 | WHERE _LEAGUE = 'International WC Qualification Asia' 17 | AND team_ratings._TYPE = 'International' 18 | ) 19 | 20 | SELECT 21 | transfermarkt_id, 22 | name, 23 | result.group, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(wc, 3) AS wc, 28 | ROUND(r4, 3) AS r4, 29 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 30 | FROM result 31 | ORDER BY result.group, points DESC -------------------------------------------------------------------------------- /src/function/hkjc_get_team_list/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import functions_framework 5 | import requests 6 | 7 | from gcp import storage 8 | from gcp.logging import setup_logging 9 | 10 | setup_logging() 11 | 12 | 13 | @functions_framework.cloud_event 14 | def main(_): 15 | storage.upload_json_to_bucket( 16 | data=get_hkjc_team_list(), 17 | blob_name="teamList.json", 18 | bucket_name=os.environ["BUCKET_NAME"], 19 | ) 20 | 21 | 22 | def get_hkjc_team_list() -> dict: 23 | logging.info("Getting HKJC team list") 24 | body = """query teamList { 25 | teamList { 26 | id 27 | code 28 | name_ch 29 | name_en 30 | } 31 | }""" 32 | response = requests.post( 33 | url="https://info.cld.hkjc.com/graphql/base/", 34 | headers={"content-type": "application/json"}, 35 | json={ 36 | "query": body, 37 | "operationName": "teamList", 38 | }, 39 | timeout=5, 40 | ) 41 | response.raise_for_status() 42 | team_list = response.json()["data"]["teamList"] 43 | print("Got HKJC team list") 44 | return team_list 45 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_acq.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.scored - table.conceded AS goal_diff, 10 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 11 | COALESCE(positions._1, 0) AS ac, 12 | leagues._DATE_UNIX 13 | FROM `simulation.leagues_latest` leagues 14 | JOIN master.teams ON leagues.team = teams.footystats_id 15 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 16 | WHERE _LEAGUE = 'International Asian Cup Qualification' 17 | AND team_ratings._TYPE = 'International' 18 | ) 19 | 20 | SELECT 21 | transfermarkt_id, 22 | name, 23 | _group, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(goal_diff, 1) AS goal_diff, 28 | ROUND(points, 1) AS points, 29 | ROUND(ac, 3) AS ac, 30 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 31 | FROM result 32 | ORDER BY _group <> 'C', _group, points DESC -------------------------------------------------------------------------------- /assets/simulation/Asia AFC Champions League.json: -------------------------------------------------------------------------------- 1 | { 2 | "East": { 3 | "format": "Season", 4 | "h2h": true, 5 | "leg": "Swiss", 6 | "advance_to": { 7 | "Round of 16": { 8 | "start": 1, 9 | "end": 8 10 | } 11 | } 12 | }, 13 | "West": { 14 | "format": "Season", 15 | "h2h": true, 16 | "leg": "Swiss", 17 | "advance_to": { 18 | "Round of 16": { 19 | "start": 1, 20 | "end": 8 21 | } 22 | } 23 | }, 24 | "Round of 16": { 25 | "format": "Knockout", 26 | "leg": 2, 27 | "advance_to": "Quarter-finals" 28 | }, 29 | "Quarter-finals": { 30 | "format": "Knockout", 31 | "leg": 1, 32 | "advance_to": "Semi-finals" 33 | }, 34 | "Semi-finals": { 35 | "format": "Knockout", 36 | "leg": 1, 37 | "advance_to": "Final" 38 | }, 39 | "Final": { 40 | "format": "Knockout", 41 | "leg": 1, 42 | "advance_to": "Winner" 43 | }, 44 | "Winner": { 45 | "format": "Winner" 46 | } 47 | } -------------------------------------------------------------------------------- /assets/simulation/Europe UEFA Champions League.json: -------------------------------------------------------------------------------- 1 | { 2 | "League Stage": { 3 | "format": "Season", 4 | "h2h": false, 5 | "leg": "Swiss", 6 | "advance_to": { 7 | "Round of 16": { 8 | "start": 1, 9 | "end": 8 10 | }, 11 | "Knockout Round Play-offs": { 12 | "start": 9, 13 | "end": 24 14 | } 15 | } 16 | }, 17 | "Knockout Round Play-offs": { 18 | "format": "Knockout", 19 | "leg": 2, 20 | "advance_to": "Round of 16" 21 | }, 22 | "Round of 16": { 23 | "format": "Knockout", 24 | "leg": 2, 25 | "advance_to": "Quarter-finals" 26 | }, 27 | "Quarter-finals": { 28 | "format": "Knockout", 29 | "leg": 2, 30 | "advance_to": "Semi-finals" 31 | }, 32 | "Semi-finals": { 33 | "format": "Knockout", 34 | "leg": 2, 35 | "advance_to": "Final" 36 | }, 37 | "Final": { 38 | "format": "Knockout", 39 | "leg": 1, 40 | "advance_to": "Winner" 41 | }, 42 | "Winner": { 43 | "format": "Winner" 44 | } 45 | } -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_asc_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 9 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 10 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 11 | COALESCE(rounds.FINAL, 0) AS f, 12 | COALESCE(rounds.CHAMPS, 0) AS champ, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'International AFC Asian Cup' 18 | AND team_ratings._TYPE = 'International' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(r16, 3) AS r16, 28 | ROUND(qf, 3) AS qf, 29 | ROUND(sf, 3) AS sf, 30 | ROUND(f, 3) AS f, 31 | ROUND(champ, 3) AS champ, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 3 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY champ DESC, f DESC, sf DESC, qf DESC, r16 DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_cwc_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 9 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 10 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 11 | COALESCE(rounds.FINAL, 0) AS f, 12 | COALESCE(rounds.WINNER, 0) AS winner, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'International FIFA Club World Cup' 18 | AND team_ratings._TYPE = 'Club' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(r16, 3) AS r16, 28 | ROUND(qf, 3) AS qf, 29 | ROUND(sf, 3) AS sf, 30 | ROUND(f, 3) AS f, 31 | ROUND(winner, 3) AS winner, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 3 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY winner DESC, f DESC, sf DESC, qf DESC, r16 DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_aco_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 9 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 10 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 11 | COALESCE(rounds.FINAL, 0) AS f, 12 | COALESCE(rounds.CHAMPS, 0) AS champ, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'International Africa Cup of Nations' 18 | AND team_ratings._TYPE = 'International' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(r16, 3) AS r16, 28 | ROUND(qf, 3) AS qf, 29 | ROUND(sf, 3) AS sf, 30 | ROUND(f, 3) AS f, 31 | ROUND(champ, 3) AS champ, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 3 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY champ DESC, f DESC, sf DESC, qf DESC, r16 DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_euro_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 9 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 10 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 11 | COALESCE(rounds.FINAL, 0) AS f, 12 | COALESCE(rounds.CHAMPS, 0) AS champ, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'International UEFA Euro Championship' 18 | AND team_ratings._TYPE = 'International' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(r16, 3) AS r16, 28 | ROUND(qf, 3) AS qf, 29 | ROUND(sf, 3) AS sf, 30 | ROUND(f, 3) AS f, 31 | ROUND(champ, 3) AS champ, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 3 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY champ DESC, f DESC, sf DESC, qf DESC, r16 DESC, rating DESC -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/team.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .results import Results 4 | from .table import Table 5 | 6 | 7 | @dataclass 8 | class Team: 9 | name: str 10 | offence: float 11 | defence: float 12 | 13 | def __post_init__(self): 14 | self.table = Table() 15 | self.h2h_table = Table() 16 | self.sim_table = Table() 17 | self.sim_positions = Results() 18 | self.sim_rounds = Results() 19 | 20 | def __eq__(self, other: "Team") -> bool: 21 | return other and self.name == other.name 22 | 23 | def __gt__(self, other: "Team") -> bool: 24 | return self.name > other.name 25 | 26 | def __hash__(self): 27 | return hash(self.name) 28 | 29 | def set_correction(self, value: int): 30 | self.table.correction = value 31 | 32 | def log_sim_table(self): 33 | self.sim_table += self.table 34 | 35 | def log_sim_positions(self, position: int): 36 | self.sim_positions[f"_{position}"] += 1 37 | 38 | def log_sim_rounds(self, _round: str): 39 | self.sim_rounds[_round] += 1 40 | 41 | def reset(self): 42 | self.table.reset() 43 | self.h2h_table.reset() 44 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/team_ratings_club.sql: -------------------------------------------------------------------------------- 1 | WITH latest AS ( 2 | SELECT 3 | RANK() OVER(ORDER BY team_ratings.rating DESC) AS rank, 4 | teams.transfermarkt_id AS team_transfermarkt_id, 5 | teams.name AS team_name, 6 | id, 7 | leagues.transfermarkt_id As league_transfermarkt_id, 8 | leagues.name AS league_name, 9 | offence, 10 | defence, 11 | team_ratings.rating, 12 | _TYPE, 13 | _DATE_UNIX 14 | FROM solver.team_ratings 15 | JOIN master.teams ON team_ratings.id = teams.solver_id 16 | AND team_ratings._TYPE = teams.type 17 | JOIN master.leagues ON teams.league_name = leagues.footystats_name 18 | WHERE team_ratings._TYPE = 'Club' 19 | AND teams.in_team_rating 20 | ) 21 | 22 | SELECT 23 | rank, 24 | RANK() OVER(ORDER BY team_ratings_7d.rating DESC) - rank AS rank_7d_diff, 25 | team_transfermarkt_id, 26 | team_name, 27 | league_transfermarkt_id, 28 | league_name, 29 | ROUND(offence, 2) AS offence, 30 | ROUND(defence, 2) AS defence, 31 | ROUND(latest.rating, 1) AS rating, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM latest 34 | LEFT JOIN solver.team_ratings_7d USING(id, _TYPE) 35 | ORDER BY rank; -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_wcq_r3.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.scored - table.conceded AS goal_diff, 10 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) AS wc, 12 | COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS r4, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'International WC Qualification Asia' 18 | AND team_ratings._TYPE = 'International' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | _group, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(wc, 3) AS wc, 31 | ROUND(r4, 3) AS r4, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/master/leagues.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | COALESCE(tournament_name, name_ch, _NAME) AS name, 3 | _COUNTRY AS country, 4 | CASE 5 | WHEN _COUNTRY = 'International' AND EXISTS ( 6 | SELECT 1 7 | FROM footystats.teams 8 | WHERE seasons._SEASON_ID = teams._SEASON_ID 9 | AND teams.name LIKE '% National Team' 10 | ) 11 | THEN 'International' 12 | ELSE 'Club' 13 | END AS type, 14 | CASE 15 | WHEN MAX(division) OVER (PARTITION BY _COUNTRY) > 1 AND division > 1 16 | THEN CONCAT(_COUNTRY, division) 17 | ELSE _COUNTRY 18 | END AS division, 19 | COALESCE(REGEXP_EXTRACT(sequence, r'\.\d+\.(\d+)\.'), display_order) AS display_order, 20 | format = 'Domestic League' AND division > 0 AS is_league, 21 | display_order IS NOT NULL AS is_manual, 22 | COALESCE(is_simulate, FALSE) AS is_simulate, 23 | seasons.id AS latest_season_id, 24 | _YEAR AS latest_season_year, 25 | _NAME AS footystats_name, 26 | hkjc_id, 27 | transfermarkt_id 28 | FROM footystats.seasons 29 | LEFT JOIN manual.leagues ON seasons._NAME = leagues.footystats_id 30 | LEFT JOIN hkjc.odds_latest ON leagues.hkjc_id = odds_latest.tournament_id 31 | QUALIFY ROW_NUMBER() OVER (PARTITION BY _NAME ORDER BY RIGHT(_YEAR, 4) DESC, LEFT(_YEAR, 4) DESC, odds_latest.update_at DESC) = 1 -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_acle_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 9 | COALESCE(rounds.round_of_16, 0) AS r16, 10 | COALESCE(rounds.quarter_finals, 0) AS qf, 11 | COALESCE(rounds.semi_finals, 0) AS sf, 12 | COALESCE(rounds.final, 0) AS f, 13 | COALESCE(rounds.winner, 0) AS winner, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Asia AFC Champions League' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(r16, 3) AS r16, 29 | ROUND(qf, 3) AS qf, 30 | ROUND(sf, 3) AS sf, 31 | ROUND(f, 3) AS f, 32 | ROUND(winner, 3) AS winner, 33 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 34 | FROM result 35 | ORDER BY winner DESC, f DESC, sf DESC, qf DESC, r16 DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_cl1.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) AS promo, 12 | COALESCE(positions._15, 0) + COALESCE(positions._16, 0) AS relegation, 13 | leagues._DATE_UNIX 14 | FROM `simulation.leagues_latest` leagues 15 | JOIN master.teams ON leagues.team = teams.footystats_id 16 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 17 | WHERE _LEAGUE = 'China China League One' 18 | AND team_ratings._TYPE = 'Club' 19 | ) 20 | 21 | SELECT 22 | transfermarkt_id, 23 | name, 24 | ROUND(rating, 1) AS rating, 25 | ROUND(offence, 2) AS offence, 26 | ROUND(defence, 2) AS defence, 27 | ROUND(goal_diff, 1) AS goal_diff, 28 | ROUND(points, 1) AS points, 29 | ROUND(champ, 3) AS champ, 30 | ROUND(promo, 3) AS promo, 31 | ROUND(relegation, 3) AS relegation, 32 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 33 | FROM result 34 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/master/teams.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | COALESCE(hkjc.name_ch, manual.name_ch, footystats.name) AS name, 3 | footystats.country, 4 | CASE 5 | WHEN _NAME LIKE 'International WC Qualification %' THEN 'International' 6 | ELSE 'Club' 7 | END AS type, 8 | footystats.id AS footystats_id, 9 | hkjc.id AS hkjc_id, 10 | manual.transfermarkt_id, 11 | CASE 12 | WHEN latest_season_id IS NOT NULL THEN CAST(footystats.id AS STRING) 13 | ELSE footystats.country 14 | END AS solver_id, 15 | manual.name_ch IS NOT NULL AS is_manual, 16 | COALESCE(is_simulate, FALSE) AS is_simulate, 17 | CASE 18 | WHEN _NAME LIKE 'International WC Qualification %' THEN (hkjc.id IS NOT NULL OR footystats.country = 'Hong Kong') 19 | ELSE (leagues.hkjc_id IS NOT NULL OR leagues.is_manual IS TRUE) AND leagues.footystats_name <> 'Mexico Ascenso MX' 20 | END AS in_team_rating, 21 | footystats_name AS league_name 22 | FROM `footystats.teams` footystats 23 | LEFT JOIN `manual.teams` manual ON footystats.id = manual.footystats_id 24 | LEFT JOIN `hkjc.teams` hkjc ON manual.hkjc_id = hkjc.id 25 | LEFT JOIN master.leagues ON _SEASON_ID = latest_season_id AND is_league 26 | QUALIFY ROW_NUMBER() OVER (PARTITION BY footystats.id ORDER BY _NAME LIKE 'International WC Qualification %' DESC, is_league DESC) = 1 -------------------------------------------------------------------------------- /src/bigquery/schema/master/teams.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "name", 4 | "type": "STRING", 5 | "mode": "NULLABLE" 6 | }, 7 | { 8 | "name": "country", 9 | "type": "STRING", 10 | "mode": "NULLABLE" 11 | }, 12 | { 13 | "name": "type", 14 | "type": "STRING", 15 | "mode": "NULLABLE" 16 | }, 17 | { 18 | "name": "is_manual", 19 | "type": "BOOLEAN", 20 | "mode": "NULLABLE" 21 | }, 22 | { 23 | "name": "is_simulate", 24 | "type": "BOOLEAN", 25 | "mode": "NULLABLE" 26 | }, 27 | { 28 | "name": "in_team_rating", 29 | "type": "BOOLEAN", 30 | "mode": "NULLABLE" 31 | }, 32 | { 33 | "name": "footystats_id", 34 | "type": "INTEGER", 35 | "mode": "NULLABLE" 36 | }, 37 | { 38 | "name": "hkjc_id", 39 | "type": "INTEGER", 40 | "mode": "NULLABLE" 41 | }, 42 | { 43 | "name": "transfermarkt_id", 44 | "type": "INTEGER", 45 | "mode": "NULLABLE" 46 | }, 47 | { 48 | "name": "solver_id", 49 | "type": "STRING", 50 | "mode": "NULLABLE" 51 | }, 52 | { 53 | "name": "league_name", 54 | "type": "STRING", 55 | "mode": "NULLABLE" 56 | } 57 | ] -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/table.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class Table: 6 | wins: int = 0 7 | draws: int = 0 8 | losses: int = 0 9 | scored: int = 0 10 | conceded: int = 0 11 | correction: int = 0 12 | 13 | def __add__(self, other: "Table"): 14 | return Table( 15 | self.wins + other.wins, 16 | self.draws + other.draws, 17 | self.losses + other.losses, 18 | self.scored + other.scored, 19 | self.conceded + other.conceded, 20 | self.correction + other.correction, 21 | ) 22 | 23 | def __truediv__(self, other: "Table"): 24 | return Table( 25 | self.wins / other, 26 | self.draws / other, 27 | self.losses / other, 28 | self.scored / other, 29 | self.conceded / other, 30 | self.correction / other, 31 | ) 32 | 33 | @property 34 | def points(self) -> int: 35 | return self.wins * 3 + self.draws + self.correction 36 | 37 | @property 38 | def goal_diff(self) -> int: 39 | return self.scored - self.conceded 40 | 41 | def reset(self): 42 | self.wins = 0 43 | self.draws = 0 44 | self.losses = 0 45 | self.scored = 0 46 | self.conceded = 0 47 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_ucl_ko.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | COALESCE(rounds.knockout_round_play_offs, 0) AS po, 9 | COALESCE(rounds.round_of_16, 0) AS r16, 10 | COALESCE(rounds.quarter_finals, 0) AS qf, 11 | COALESCE(rounds.semi_finals, 0) AS sf, 12 | COALESCE(rounds.final, 0) AS f, 13 | COALESCE(rounds.winner, 0) AS winner, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Europe UEFA Champions League' 19 | AND team_ratings._TYPE = 'Club' 20 | AND ROUND(table.wins + table.draws + table.losses) = 8 21 | ) 22 | 23 | SELECT 24 | transfermarkt_id, 25 | name, 26 | ROUND(rating, 1) AS rating, 27 | ROUND(offence, 2) AS offence, 28 | ROUND(defence, 2) AS defence, 29 | ROUND(po, 3) AS po, 30 | ROUND(r16, 3) AS r16, 31 | ROUND(qf, 3) AS qf, 32 | ROUND(sf, 3) AS sf, 33 | ROUND(f, 3) AS f, 34 | ROUND(winner, 3) AS winner, 35 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 36 | FROM result 37 | ORDER BY winner DESC, f DESC, sf DESC, qf DESC, r16 DESC, po DESC, rating DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_j1.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2 , 0) AS acle, 12 | COALESCE(positions._3, 0) AS acl2, 13 | COALESCE(positions._18, 0) + COALESCE(positions._19, 0) + COALESCE(positions._20, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Japan J1 League' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(acle, 3) AS acle, 32 | ROUND(acl2, 3) AS acl2, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/function/solver/solver/queries.py: -------------------------------------------------------------------------------- 1 | from solver.models import League, Match, Team 2 | from gcp import bigquery 3 | 4 | 5 | def get_matches_and_teams(_type: str, max_time: int) -> dict: 6 | data = bigquery.query_dict( 7 | query="SELECT * FROM `solver.get_matches`(@type, @max_time);", 8 | params={"type": _type, "max_time": max_time}, 9 | ) 10 | league_names = {match["league_name"] for match in data} 11 | leagues = {name: League(name) for name in league_names} 12 | team_ids = {(match["home_id"], match["home_team_in_rating"]) for match in data} | { 13 | (match["away_id"], match["away_team_in_rating"]) for match in data 14 | } 15 | teams = { 16 | id: Team(id, in_solver_constraints=in_team_rating) 17 | for id, in_team_rating in team_ids 18 | } 19 | return { 20 | "leagues": leagues.values(), 21 | "teams": teams.values(), 22 | "matches": [ 23 | Match( 24 | id=match["id"], 25 | league=leagues[match["league_name"]], 26 | home_team=teams[match["home_id"]], 27 | away_team=teams[match["away_id"]], 28 | home_score=match["home_avg"], 29 | away_score=match["away_avg"], 30 | recent=match["recent"], 31 | ) 32 | for match in data 33 | ], 34 | } 35 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_copa_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS _1st, 11 | COALESCE(positions._2, 0) AS _2nd, 12 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 13 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 14 | COALESCE(rounds.FINAL, 0) AS f, 15 | COALESCE(rounds.CHAMPS, 0) AS champ, 16 | leagues._DATE_UNIX 17 | FROM `simulation.leagues_latest` leagues 18 | JOIN master.teams ON leagues.team = teams.footystats_id 19 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 20 | WHERE _LEAGUE = 'International Copa America' 21 | AND team_ratings._TYPE = 'International' 22 | ) 23 | 24 | SELECT 25 | transfermarkt_id, 26 | name, 27 | _group, 28 | ROUND(rating, 1) AS rating, 29 | ROUND(offence, 2) AS offence, 30 | ROUND(defence, 2) AS defence, 31 | ROUND(_1st, 3) AS _1st, 32 | ROUND(_2nd, 3) AS _2nd, 33 | ROUND(qf, 3) AS qf, 34 | ROUND(sf, 3) AS sf, 35 | ROUND(f, 3) AS f, 36 | ROUND(champ, 3) AS champ, 37 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 38 | FROM result 39 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_csl.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) * 0.5 AS acle, 12 | COALESCE(positions._2, 0) * 0.5 + COALESCE(positions._3, 0) AS acl2, 13 | COALESCE(positions._15, 0) + COALESCE(positions._16, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'China Chinese Super League' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(acle, 3) AS acle, 32 | ROUND(acl2, 3) AS acl2, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_hkpl.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) + COALESCE(positions._5, 0) AS champ_group, 11 | COALESCE(positions._6, 0) + COALESCE(positions._7, 0) + COALESCE(positions._8, 0) + COALESCE(positions._9, 0) + COALESCE(positions._10, 0) AS challenge_group, 12 | leagues._DATE_UNIX 13 | FROM `simulation.leagues_latest` leagues 14 | JOIN master.teams ON leagues.team = teams.footystats_id 15 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 16 | WHERE _LEAGUE = 'Hong Kong Hong Kong Premier League' 17 | AND team_ratings._TYPE = 'Club' 18 | ) 19 | 20 | SELECT 21 | transfermarkt_id, 22 | name, 23 | ROUND(rating, 1) AS rating, 24 | ROUND(offence, 2) AS offence, 25 | ROUND(defence, 2) AS defence, 26 | ROUND(goal_diff, 1) AS goal_diff, 27 | ROUND(points, 1) AS points, 28 | ROUND(champ_group, 3) AS champ_group, 29 | ROUND(challenge_group, 3) AS challenge_group, 30 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 31 | FROM result 32 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_ll.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS ucl, 12 | COALESCE(positions._5, 0) AS uel, 13 | COALESCE(positions._18, 0) + COALESCE(positions._19, 0) + COALESCE(positions._20, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Spain La Liga' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(ucl, 3) AS ucl, 32 | ROUND(uel, 3) AS uel, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_sea.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS ucl, 12 | COALESCE(positions._5, 0) AS uel, 13 | COALESCE(positions._18, 0) + COALESCE(positions._19, 0) + COALESCE(positions._20, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Italy Serie A' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(ucl, 3) AS ucl, 32 | ROUND(uel, 3) AS uel, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_bun.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS ucl, 12 | COALESCE(positions._5, 0) AS uel, 13 | COALESCE(positions._16 * 0.5, 0) + COALESCE(positions._17, 0) + COALESCE(positions._18, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'Germany Bundesliga' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(ucl, 3) AS ucl, 32 | ROUND(uel, 3) AS uel, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_epl.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) AS ucl, 12 | COALESCE(positions._5, 0) AS uel, 13 | COALESCE(positions._18, 0) + COALESCE(positions._19, 0) + COALESCE(positions._20, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'England Premier League' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(ucl, 3) AS ucl, 32 | ROUND(uel, 3) AS uel, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_cwc_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.scored - table.conceded AS goal_diff, 10 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 11 | COALESCE(rounds.round_of_16, 0) AS r16, 12 | COALESCE(rounds.quarter_finals, 0) AS qf, 13 | COALESCE(rounds.semi_finals, 0) AS sf, 14 | COALESCE(rounds.final, 0) AS f, 15 | COALESCE(rounds.winner, 0) AS winner, 16 | leagues._DATE_UNIX 17 | FROM `simulation.leagues_latest` leagues 18 | JOIN master.teams ON leagues.team = teams.footystats_id 19 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 20 | WHERE _LEAGUE = 'International FIFA Club World Cup' 21 | AND team_ratings._TYPE = 'Club' 22 | ) 23 | 24 | SELECT 25 | transfermarkt_id, 26 | name, 27 | _group, 28 | ROUND(rating, 1) AS rating, 29 | ROUND(offence, 2) AS offence, 30 | ROUND(defence, 2) AS defence, 31 | ROUND(goal_diff, 1) AS goal_diff, 32 | ROUND(points, 1) AS points, 33 | ROUND(r16, 3) AS r16, 34 | ROUND(qf, 3) AS qf, 35 | ROUND(sf, 3) AS sf, 36 | ROUND(f, 3) AS f, 37 | ROUND(winner, 3) AS winner, 38 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 39 | FROM result 40 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_li1.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + table.correction AS points, 10 | COALESCE(positions._1, 0) AS champ, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) * 0.25 AS ucl, 12 | COALESCE(positions._4, 0) * 0.75 + COALESCE(positions._5, 0) AS uel, 13 | COALESCE(positions._16, 0) * 0.5 + COALESCE(positions._17, 0) + COALESCE(positions._18, 0) AS relegation, 14 | leagues._DATE_UNIX 15 | FROM `simulation.leagues_latest` leagues 16 | JOIN master.teams ON leagues.team = teams.footystats_id 17 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 18 | WHERE _LEAGUE = 'France Ligue 1' 19 | AND team_ratings._TYPE = 'Club' 20 | ) 21 | 22 | SELECT 23 | transfermarkt_id, 24 | name, 25 | ROUND(rating, 1) AS rating, 26 | ROUND(offence, 2) AS offence, 27 | ROUND(defence, 2) AS defence, 28 | ROUND(goal_diff, 1) AS goal_diff, 29 | ROUND(points, 1) AS points, 30 | ROUND(champ, 3) AS champ, 31 | ROUND(ucl, 3) AS ucl, 32 | ROUND(uel, 3) AS uel, 33 | ROUND(relegation, 3) AS relegation, 34 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 35 | FROM result 36 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_acl2_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.scored - table.conceded AS goal_diff, 10 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 11 | COALESCE(rounds.round_of_16, 0) AS r16, 12 | COALESCE(rounds.quarter_finals, 0) AS qf, 13 | COALESCE(rounds.semi_finals, 0) AS sf, 14 | COALESCE(rounds.final, 0) AS f, 15 | COALESCE(rounds.winner, 0) AS winner, 16 | leagues._DATE_UNIX 17 | FROM `simulation.leagues_latest` leagues 18 | JOIN master.teams ON leagues.team = teams.footystats_id 19 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 20 | WHERE _LEAGUE = 'Asia AFC Cup' 21 | AND team_ratings._TYPE = 'Club' 22 | ) 23 | 24 | SELECT 25 | transfermarkt_id, 26 | name, 27 | _group, 28 | ROUND(rating, 1) AS rating, 29 | ROUND(offence, 2) AS offence, 30 | ROUND(defence, 2) AS defence, 31 | ROUND(goal_diff, 1) AS goal_diff, 32 | ROUND(points, 1) AS points, 33 | ROUND(r16, 3) AS r16, 34 | ROUND(qf, 3) AS qf, 35 | ROUND(sf, 3) AS sf, 36 | ROUND(f, 3) AS f, 37 | ROUND(winner, 3) AS winner, 38 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 39 | FROM result 40 | ORDER BY _group IN ('E', 'F') DESC, _group IN ('G', 'H') DESC, _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_aco_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS _1st, 11 | COALESCE(positions._2, 0) AS _2nd, 12 | COALESCE(positions._3, 0) AS _3rd, 13 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 14 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 15 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 16 | COALESCE(rounds.FINAL, 0) AS f, 17 | COALESCE(rounds.CHAMPS, 0) AS champ, 18 | leagues._DATE_UNIX 19 | FROM `simulation.leagues_latest` leagues 20 | JOIN master.teams ON leagues.team = teams.footystats_id 21 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 22 | WHERE _LEAGUE = 'International Africa Cup of Nations' 23 | AND team_ratings._TYPE = 'International' 24 | ) 25 | 26 | SELECT 27 | transfermarkt_id, 28 | name, 29 | _group, 30 | ROUND(rating, 1) AS rating, 31 | ROUND(offence, 2) AS offence, 32 | ROUND(defence, 2) AS defence, 33 | ROUND(_1st, 3) AS _1st, 34 | ROUND(_2nd, 3) AS _2nd, 35 | ROUND(_3rd, 3) AS _3rd, 36 | ROUND(r16, 3) AS r16, 37 | ROUND(qf, 3) AS qf, 38 | ROUND(sf, 3) AS sf, 39 | ROUND(f, 3) AS f, 40 | ROUND(champ, 3) AS champ, 41 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 42 | FROM result 43 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_euro_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS _1st, 11 | COALESCE(positions._2, 0) AS _2nd, 12 | COALESCE(positions._3, 0) AS _3rd, 13 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 14 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 15 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 16 | COALESCE(rounds.FINAL, 0) AS f, 17 | COALESCE(rounds.CHAMPS, 0) AS champ, 18 | leagues._DATE_UNIX 19 | FROM `simulation.leagues_latest` leagues 20 | JOIN master.teams ON leagues.team = teams.footystats_id 21 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 22 | WHERE _LEAGUE = 'International UEFA Euro Championship' 23 | AND team_ratings._TYPE = 'International' 24 | ) 25 | 26 | SELECT 27 | transfermarkt_id, 28 | name, 29 | _group, 30 | ROUND(rating, 1) AS rating, 31 | ROUND(offence, 2) AS offence, 32 | ROUND(defence, 2) AS defence, 33 | ROUND(_1st, 3) AS _1st, 34 | ROUND(_2nd, 3) AS _2nd, 35 | ROUND(_3rd, 3) AS _3rd, 36 | ROUND(r16, 3) AS r16, 37 | ROUND(qf, 3) AS qf, 38 | ROUND(sf, 3) AS sf, 39 | ROUND(f, 3) AS f, 40 | ROUND(champ, 3) AS champ, 41 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 42 | FROM result 43 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_asc_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | RIGHT(leagues.group, 1) AS _group, 6 | rating, 7 | offence, 8 | defence, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) AS _1st, 11 | COALESCE(positions._2, 0) AS _2nd, 12 | COALESCE(positions._3, 0) AS _3rd, 13 | COALESCE(rounds.ROUND_OF_16, 0) AS r16, 14 | COALESCE(rounds.QUARTER_FINALS, 0) AS qf, 15 | COALESCE(rounds.SEMI_FINALS, 0) AS sf, 16 | COALESCE(rounds.FINAL, 0) AS f, 17 | COALESCE(rounds.CHAMPS, 0) AS champ, 18 | leagues._DATE_UNIX 19 | FROM `simulation.leagues_latest` leagues 20 | JOIN master.teams ON leagues.team = teams.footystats_id 21 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 22 | WHERE _LEAGUE = 'International AFC Asian Cup' 23 | AND team_ratings._TYPE = 'International' 24 | ) 25 | 26 | SELECT 27 | transfermarkt_id, 28 | name, 29 | _group, 30 | ROUND(rating, 1) AS rating, 31 | ROUND(offence, 2) AS offence, 32 | ROUND(defence, 2) AS defence, 33 | ROUND(_1st, 3) AS _1st, 34 | ROUND(_2nd, 3) AS _2nd, 35 | ROUND(_3rd, 3) AS _3rd, 36 | ROUND(r16, 3) AS r16, 37 | ROUND(qf, 3) AS qf, 38 | ROUND(sf, 3) AS sf, 39 | ROUND(f, 3) AS f, 40 | ROUND(champ, 3) AS champ, 41 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 42 | FROM result 43 | ORDER BY _group <> 'C', _group, points DESC -------------------------------------------------------------------------------- /src/bigquery/schema/master/leagues.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "name", 4 | "type": "STRING", 5 | "mode": "NULLABLE" 6 | }, 7 | { 8 | "name": "country", 9 | "type": "STRING", 10 | "mode": "NULLABLE" 11 | }, 12 | { 13 | "name": "type", 14 | "type": "STRING", 15 | "mode": "NULLABLE" 16 | }, 17 | { 18 | "name": "division", 19 | "type": "STRING", 20 | "mode": "NULLABLE" 21 | }, 22 | { 23 | "name": "display_order", 24 | "type": "STRING", 25 | "mode": "NULLABLE" 26 | }, 27 | { 28 | "name": "is_league", 29 | "type": "BOOLEAN", 30 | "mode": "NULLABLE" 31 | }, 32 | { 33 | "name": "is_manual", 34 | "type": "BOOLEAN", 35 | "mode": "NULLABLE" 36 | }, 37 | { 38 | "name": "is_simulate", 39 | "type": "BOOLEAN", 40 | "mode": "NULLABLE" 41 | }, 42 | { 43 | "name": "latest_season_id", 44 | "type": "INTEGER", 45 | "mode": "NULLABLE" 46 | }, 47 | { 48 | "name": "latest_season_year", 49 | "type": "STRING", 50 | "mode": "NULLABLE" 51 | }, 52 | { 53 | "name": "footystats_name", 54 | "type": "STRING", 55 | "mode": "NULLABLE" 56 | }, 57 | { 58 | "name": "hkjc_id", 59 | "type": "STRING", 60 | "mode": "NULLABLE" 61 | }, 62 | { 63 | "name": "transfermarkt_id", 64 | "type": "STRING", 65 | "mode": "NULLABLE" 66 | } 67 | ] -------------------------------------------------------------------------------- /src/function/solver/solver/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from pulp import LpVariable 4 | 5 | 6 | @dataclass 7 | class League: 8 | name: str 9 | 10 | def __post_init__(self): 11 | self.avg_goal = LpVariable(f"avg_goal_{self.name}", lowBound=0) 12 | self.home_adv = LpVariable(f"home_adv_{self.name}", lowBound=0) 13 | 14 | 15 | @dataclass 16 | class Team: 17 | id: str 18 | in_solver_constraints: bool 19 | 20 | def __post_init__(self): 21 | self.offence = LpVariable(f"offence_{self.id}") 22 | self.defence = LpVariable(f"defence_{self.id}") 23 | 24 | 25 | @dataclass 26 | class Match: 27 | id: int 28 | league: League 29 | home_team: Team 30 | away_team: Team 31 | home_score: float 32 | away_score: float 33 | recent: float 34 | 35 | def __post_init__(self): 36 | self.home_error = LpVariable(f"home_error_{self.id}") 37 | self.away_error = LpVariable(f"away_error_{self.id}") 38 | 39 | @property 40 | def home_error_val(self): 41 | return ( 42 | self.league.avg_goal 43 | + self.league.home_adv 44 | + self.home_team.offence 45 | + self.away_team.defence 46 | - self.home_score 47 | ) * self.recent 48 | 49 | @property 50 | def away_error_val(self): 51 | return ( 52 | self.league.avg_goal 53 | - self.league.home_adv 54 | + self.away_team.offence 55 | + self.home_team.defence 56 | - self.away_score 57 | ) * self.recent 58 | -------------------------------------------------------------------------------- /src/bigquery/sql/hkjc/odds_clean.sql: -------------------------------------------------------------------------------- 1 | WITH _odds AS ( 2 | SELECT odds.id, 3 | MAX(CASE WHEN pool.oddsType = "HAD" AND combination.str = "H" THEN combination.currentOdds END) AS HAD_H, 4 | MAX(CASE WHEN pool.oddsType = "HAD" AND combination.str = "D" THEN combination.currentOdds END) AS HAD_D, 5 | MAX(CASE WHEN pool.oddsType = "HAD" AND combination.str = "A" THEN combination.currentOdds END) AS HAD_A, 6 | MAX(CASE WHEN pool.oddsType = "HDC" AND combination.str = "H" THEN combination.currentOdds END) AS HDC_H, 7 | MAX(CASE WHEN pool.oddsType = "HDC" AND combination.str = "A" THEN combination.currentOdds END) AS HDC_A, 8 | MAX(CASE WHEN pool.oddsType = "HDC" THEN line.condition END) AS handicap, 9 | _TIMESTAMP 10 | FROM hkjc.odds, 11 | UNNEST(foPools) AS pool, 12 | UNNEST(pool.lines) AS line, 13 | UNNEST(line.combinations) AS combination 14 | WHERE odds.status = 'PREEVENT' 15 | AND line.status = 'AVAILABLE' 16 | GROUP BY odds.id, _TIMESTAMP 17 | ) 18 | 19 | SELECT 20 | id, 21 | kickOffTime AS kick_off_time, 22 | sequence, 23 | homeTeam.id AS home_id, 24 | homeTeam.name_ch AS home_name, 25 | homeTeam.name_en AS home_name_en, 26 | awayTeam.id AS away_id, 27 | awayTeam.name_ch AS away_name, 28 | awayTeam.name_en AS away_name_en, 29 | tournament.code AS tournament_id, 30 | tournament.name_ch AS tournament_name, 31 | CAST(venue.code IS NULL AS INT64) AS home_adv, 32 | HAD_H, 33 | HAD_D, 34 | HAD_A, 35 | HDC_H, 36 | HDC_A, 37 | handicap, 38 | updateAt AS update_at, 39 | _TIMESTAMP 40 | FROM hkjc.odds 41 | JOIN _odds USING (id, _TIMESTAMP) -------------------------------------------------------------------------------- /src/function/solver/solver/solver.py: -------------------------------------------------------------------------------- 1 | from pulp import LpMinimize, LpProblem, lpSum 2 | 3 | from solver.models import League, Match, Team 4 | 5 | 6 | def solver( 7 | matches: list[Match], teams: list[Team], leagues: list[League] 8 | ) -> dict[str, list[dict[str, float]]]: 9 | prob = LpProblem(sense=LpMinimize) 10 | 11 | for match in matches: 12 | # Constraints for absolute values 13 | prob += match.home_error >= match.home_error_val 14 | prob += match.home_error >= -match.home_error_val 15 | prob += match.away_error >= match.away_error_val 16 | prob += match.away_error >= -match.away_error_val 17 | 18 | # Objective function 19 | prob += lpSum(match.home_error for match in matches) + lpSum( 20 | match.away_error for match in matches 21 | ) 22 | 23 | # Other constraints 24 | prob += lpSum(team.offence for team in teams if team.in_solver_constraints) == 0 25 | prob += lpSum(team.defence for team in teams if team.in_solver_constraints) == 0 26 | 27 | prob.solve() 28 | 29 | return { 30 | "leagues": [ 31 | { 32 | "division": league.name, 33 | "avg_goal": league.avg_goal.varValue, 34 | "home_adv": league.home_adv.varValue, 35 | } 36 | for league in leagues 37 | ], 38 | "teams": [ 39 | { 40 | "id": team.id, 41 | "offence": team.offence.varValue, 42 | "defence": team.defence.varValue, 43 | } 44 | for team in teams 45 | ], 46 | } 47 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_acle_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | CASE 6 | WHEN teams.country IN ('Japan', 'South Korea', 'Thailand', 'China', 'Malaysia', 'Australia', 'Vietnam') THEN '東亞' 7 | ELSE '西亞' 8 | END AS _group, 9 | rating, 10 | offence, 11 | defence, 12 | table.scored - table.conceded AS goal_diff, 13 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 14 | COALESCE(rounds.round_of_16, 0) AS r16, 15 | COALESCE(rounds.quarter_finals, 0) AS qf, 16 | COALESCE(rounds.semi_finals, 0) AS sf, 17 | COALESCE(rounds.final, 0) AS f, 18 | COALESCE(rounds.winner, 0) AS winner, 19 | leagues._DATE_UNIX 20 | FROM `simulation.leagues_latest` leagues 21 | JOIN master.teams ON leagues.team = teams.footystats_id 22 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 23 | WHERE _LEAGUE = 'Asia AFC Champions League' 24 | AND team_ratings._TYPE = 'Club' 25 | AND ROUND(table.wins + table.draws + table.losses) = 8 26 | ) 27 | 28 | SELECT 29 | transfermarkt_id, 30 | name, 31 | _group, 32 | ROUND(rating, 1) AS rating, 33 | ROUND(offence, 2) AS offence, 34 | ROUND(defence, 2) AS defence, 35 | ROUND(goal_diff, 1) AS goal_diff, 36 | ROUND(points, 1) AS points, 37 | ROUND(r16, 3) AS r16, 38 | ROUND(qf, 3) AS qf, 39 | ROUND(sf, 3) AS sf, 40 | ROUND(f, 3) AS f, 41 | ROUND(winner, 3) AS winner, 42 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 43 | FROM result 44 | ORDER BY _group, points DESC -------------------------------------------------------------------------------- /src/function/simulate_tournament/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | 5 | import functions_framework 6 | from cloudevents.http.event import CloudEvent 7 | 8 | from gcp import storage 9 | 10 | from gcp.logging import setup_logging 11 | 12 | from gcp.util import decode_message 13 | from simulation import queries 14 | from simulation.tournaments import Tournament 15 | 16 | 17 | setup_logging() 18 | 19 | 20 | @functions_framework.cloud_event 21 | def main(cloud_event: CloudEvent): 22 | message = decode_message(cloud_event) 23 | league = message["footystats_name"] 24 | blob = storage.download_blob( 25 | blob_name=f"{league}.json", bucket_name="manual-340977255134-asia-east2" 26 | ) 27 | rounds = json.loads(blob) 28 | 29 | factors = queries.get_avg_goal_home_adv(league) 30 | avg_goal, home_adv = factors["avg_goal"], factors["home_adv"] 31 | teams = queries.get_teams(league) 32 | matches = queries.get_matches(league, teams) 33 | groups = queries.get_groups(league, teams) 34 | 35 | logging.info("Simulating: %s", league) 36 | tournament = Tournament( 37 | avg_goal, 38 | home_adv, 39 | teams, 40 | matches, 41 | groups, 42 | ) 43 | tournament.set_rounds(rounds) 44 | tournament.simulate() 45 | logging.info("Simulated: %s", league) 46 | 47 | storage.upload_json_to_bucket( 48 | tournament.result, 49 | blob_name="league.json", 50 | bucket_name=os.environ["RESULT_BUCKET_NAME"], 51 | hive_partitioning={ 52 | "_LEAGUE": league, 53 | "_DATE_UNIX": message["latest_match_date"], 54 | }, 55 | ) 56 | -------------------------------------------------------------------------------- /src/function/solver/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/function/hkjc_get_odds/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/bigquery/sql/functions/matchProbs.js: -------------------------------------------------------------------------------- 1 | const goalDiff = 5; 2 | 3 | function factorial(n) { 4 | if (n === 0) { 5 | return 1; 6 | } else { 7 | return n * factorial(n - 1); 8 | } 9 | } 10 | 11 | if (projScore1 === null || projScore2 === null || handicap === null) { 12 | return [null, null, null]; 13 | } 14 | 15 | let handicaps = String(handicap).split('/').map(function(x) { 16 | return parseFloat(x); 17 | }); 18 | 19 | let prob1 = new Array(goalDiff + 1).fill(0); 20 | let prob2 = new Array(goalDiff + 1).fill(0); 21 | 22 | for (let i = 0; i < goalDiff; i++) { 23 | prob1[i] = Math.exp(-projScore1) * Math.pow(projScore1, i) / factorial(i); 24 | prob2[i] = Math.exp(-projScore2) * Math.pow(projScore2, i) / factorial(i); 25 | } 26 | 27 | prob1[goalDiff] = 1 - prob1.reduce(function(a, b) { 28 | return a + b; 29 | }, 0); 30 | prob2[goalDiff] = 1 - prob2.reduce(function(a, b) { 31 | return a + b; 32 | }, 0); 33 | 34 | let probWin1 = 0; 35 | let probDraw = 0; 36 | let probWin2 = 0; 37 | 38 | for (let h of handicaps) { 39 | for (let i = 0; i <= goalDiff; i++) { 40 | for (let j = 0; j <= goalDiff; j++) { 41 | let totalScore1 = i + h; 42 | if (totalScore1 > j) { 43 | probWin1 += prob1[i] * prob2[j]; 44 | } else if (totalScore1 < j) { 45 | probWin2 += prob1[i] * prob2[j]; 46 | } else { 47 | probDraw += prob1[i] * prob2[j]; 48 | } 49 | } 50 | } 51 | } 52 | 53 | let handicapCount = handicaps.length; 54 | probWin1 = probWin1 / handicapCount; 55 | probDraw = probDraw / handicapCount; 56 | probWin2 = probWin2 / handicapCount; 57 | 58 | return [probWin1, probDraw, probWin2]; -------------------------------------------------------------------------------- /src/function/hkjc_get_results/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/function/hkjc_get_team_list/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/function/footystats_get_league_list/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /src/function/footystats_transform_matches/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.ReadTimeout, 42 | requests.exceptions.SSLError, 43 | ssl.SSLEOFError, 44 | ) as error: 45 | logging.warning(f"Upload failed: {blob_name=} {error=}") 46 | raise GCSUploadError() 47 | 48 | 49 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 50 | if hive_partitioning: 51 | hive_dir = "/".join( 52 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 53 | ) 54 | return "/".join([hive_dir, blob_name]) 55 | return blob_name 56 | -------------------------------------------------------------------------------- /infrastructure/modules/scheduled-function/main.tf: -------------------------------------------------------------------------------- 1 | module "pubsub" { 2 | source = "../pubsub" 3 | 4 | topic = var.topic_name 5 | project_id = var.project_id 6 | } 7 | 8 | resource "google_cloud_scheduler_job" "job" { 9 | name = var.job_name 10 | schedule = var.job_schedule 11 | paused = var.job_paused 12 | region = var.region 13 | project = var.project_id 14 | 15 | pubsub_target { 16 | topic_name = "projects/${var.project_id}/topics/${module.pubsub.topic}" 17 | data = base64encode(var.message_data) 18 | } 19 | } 20 | 21 | module "function" { 22 | source = "../event-function" 23 | 24 | name = var.function_name 25 | runtime = var.function_runtime 26 | entry_point = var.function_entry_point 27 | docker_repository = var.docker_repository 28 | bucket_name = var.bucket_name 29 | timeout_s = var.function_timeout_s 30 | available_memory = var.function_available_memory 31 | available_cpu = var.function_available_cpu 32 | environment_variables = var.function_environment_variables 33 | max_instances = var.function_max_instances 34 | secret_environment_variables = var.function_secret_environment_variables 35 | event_type = "google.cloud.pubsub.topic.v1.messagePublished" 36 | event_filters = var.function_event_filters 37 | topic_name = module.pubsub.id 38 | event_trigger_failure_policy = var.function_event_trigger_failure_policy 39 | source_directory = var.function_source_directory 40 | region = var.region 41 | project_id = var.project_id 42 | } 43 | -------------------------------------------------------------------------------- /src/function/footystats_get_data/gcp/storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import ssl 4 | import urllib3 5 | 6 | import requests 7 | from google.cloud import storage 8 | 9 | CLIENT = storage.Client() 10 | 11 | 12 | class GCSUploadError(Exception): 13 | pass 14 | 15 | 16 | def convert_to_newline_delimited_json(data: dict | list) -> str: 17 | if isinstance(data, list): 18 | return "\n".join([json.dumps(d) for d in data]) 19 | return json.dumps(data) 20 | 21 | 22 | def download_blob(blob_name: str, bucket_name: str) -> str: 23 | return CLIENT.bucket(bucket_name).blob(blob_name).download_as_text() 24 | 25 | 26 | def upload_json_to_bucket( 27 | data: list[dict], 28 | blob_name: str, 29 | bucket_name: str, 30 | hive_partitioning: dict | None = None, 31 | ): 32 | blob_name = get_directory(blob_name, hive_partitioning) 33 | blob = CLIENT.bucket(bucket_name).blob(blob_name) 34 | data = convert_to_newline_delimited_json(data) 35 | 36 | try: 37 | blob.upload_from_string(data) 38 | logging.info(f"Uploaded blob: {blob_name=}") 39 | except ( 40 | urllib3.exceptions.MaxRetryError, 41 | requests.exceptions.HTTPError, 42 | requests.exceptions.ReadTimeout, 43 | requests.exceptions.SSLError, 44 | ssl.SSLEOFError, 45 | ) as error: 46 | logging.warning(f"Upload failed: {blob_name=} {error=}") 47 | raise GCSUploadError() 48 | 49 | 50 | def get_directory(blob_name: str, hive_partitioning: dict | None = None): 51 | if hive_partitioning: 52 | hive_dir = "/".join( 53 | f"{k}={str(v).replace('/', ' ')}" for k, v in hive_partitioning.items() 54 | ) 55 | return "/".join([hive_dir, blob_name]) 56 | return blob_name 57 | -------------------------------------------------------------------------------- /src/bigquery/sql/solver/get_matches.sql: -------------------------------------------------------------------------------- 1 | WITH matches AS ( 2 | SELECT 3 | matches.id, 4 | home_teams.solver_id AS home_id, 5 | home_teams.in_team_rating AS home_team_in_rating, 6 | away_teams.solver_id AS away_id, 7 | away_teams.in_team_rating AS away_team_in_rating, 8 | division, 9 | CASE 10 | WHEN (is_league OR home_teams.country = away_teams.country)AND league_type = 'Club' THEN 1 11 | ELSE 5 12 | END 13 | AS cut_off_year, 14 | date_unix, 15 | home_avg, 16 | away_avg 17 | FROM ${project_id}.footystats.matches 18 | JOIN `${project_id}.master.teams` home_teams ON matches.homeID = home_teams.footystats_id 19 | JOIN `${project_id}.master.teams` away_teams ON matches.awayID = away_teams.footystats_id 20 | JOIN ${project_id}.master.leagues ON matches._NAME = leagues.footystats_name 21 | JOIN ${project_id}.footystats.matches_transformed USING (id) 22 | WHERE matches.status = 'complete' 23 | AND date_unix <= max_time 24 | AND home_teams.solver_id <> away_teams.solver_id 25 | AND home_teams.type = league_type 26 | AND away_teams.type = league_type 27 | AND leagues.type = league_type 28 | ), 29 | 30 | recentness AS ( 31 | SELECT 32 | id, 33 | 1 - (max_time - date_unix) / (365 * 24 * 60 * 60 * cut_off_year) AS recent, 34 | (1 - (max_time - date_unix) / (25 * 24 * 60 * 60 * cut_off_year)) * 0.25 AS recent_bonus 35 | FROM matches 36 | WHERE max_time - date_unix < 365 * 24 * 60 * 60 * cut_off_year 37 | ) 38 | 39 | SELECT 40 | matches.id, 41 | division AS league_name, 42 | home_id, 43 | home_team_in_rating, 44 | away_id, 45 | away_team_in_rating, 46 | recent + GREATEST(recent_bonus, 0) AS recent, 47 | home_avg, 48 | away_avg 49 | FROM matches 50 | JOIN recentness USING (id) -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/queries.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from gcp import bigquery 4 | 5 | from simulation.models import Team, Match 6 | 7 | 8 | def get_avg_goal_home_adv(league: str) -> tuple[float]: 9 | return bigquery.query_dict( 10 | query="SELECT * FROM `simulation.get_avg_goal_home_adv`(@league);", 11 | params={"league": league}, 12 | )[0] 13 | 14 | 15 | def get_teams(league: str) -> dict[str, Team]: 16 | return { 17 | team["name"]: Team(**team) 18 | for team in bigquery.query_dict( 19 | query="SELECT * FROM `simulation.get_teams`(@league);", 20 | params={"league": league}, 21 | ) 22 | } 23 | 24 | 25 | def get_matches(league: str, teams: dict[str, Team]) -> dict[str, Match]: 26 | rounds = defaultdict(list) 27 | for row in bigquery.query_dict( 28 | query="SELECT * FROM `simulation.get_matches`(@league);", 29 | params={"league": league}, 30 | ): 31 | rounds[row["round"]].append( 32 | Match( 33 | home_team=teams[row["homeId"]], 34 | away_team=teams[row["awayId"]], 35 | status=row["status"], 36 | home_score=row["homeGoalCount"], 37 | away_score=row["awayGoalCount"], 38 | ) 39 | ) 40 | return rounds 41 | 42 | 43 | def get_groups(league: str, teams: dict[str, Team]) -> dict[str, list[Team]]: 44 | rounds = defaultdict(lambda: defaultdict(list)) 45 | rows = bigquery.query_dict( 46 | query="SELECT * FROM `simulation.get_groups`(@league);", 47 | params={"league": league}, 48 | ) 49 | for row in rows: 50 | rounds[row["round"]][row["name"]].append(teams[row["id"]]) 51 | return rounds 52 | -------------------------------------------------------------------------------- /infrastructure/modules/event-function/main.tf: -------------------------------------------------------------------------------- 1 | data "archive_file" "file" { 2 | type = "zip" 3 | output_path = "${var.source_directory}/${var.name}.zip" 4 | source_dir = "${var.source_directory}/${var.name}" 5 | } 6 | 7 | resource "google_storage_bucket_object" "object" { 8 | bucket = var.bucket_name 9 | name = "${var.name}.zip" 10 | source = data.archive_file.file.output_path 11 | } 12 | 13 | resource "google_cloudfunctions2_function" "function" { 14 | name = var.name 15 | location = var.region 16 | project = var.project_id 17 | 18 | build_config { 19 | runtime = var.runtime 20 | entry_point = var.entry_point 21 | docker_repository = var.docker_repository 22 | source { 23 | storage_source { 24 | bucket = var.bucket_name 25 | object = google_storage_bucket_object.object.name 26 | } 27 | } 28 | } 29 | 30 | service_config { 31 | timeout_seconds = var.timeout_s 32 | available_memory = var.available_memory 33 | max_instance_request_concurrency = var.max_instance_request_concurrency 34 | available_cpu = var.available_cpu 35 | environment_variables = merge(var.environment_variables, { "LOG_EXECUTION_ID" = true }) 36 | max_instance_count = var.max_instances 37 | dynamic "secret_environment_variables" { 38 | for_each = var.secret_environment_variables 39 | content { 40 | key = secret_environment_variables.value 41 | project_id = var.project_id 42 | secret = secret_environment_variables.value 43 | version = "latest" 44 | } 45 | } 46 | } 47 | 48 | event_trigger { 49 | trigger_region = var.region 50 | event_type = var.event_type 51 | dynamic "event_filters" { 52 | for_each = length(var.event_filters) > 0 ? [1] : [] 53 | content { 54 | attribute = var.event_filters["attribute"] 55 | value = var.event_filters["value"] 56 | } 57 | } 58 | pubsub_topic = var.topic_name 59 | retry_policy = var.event_trigger_failure_policy 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /assets/simulation/Asia AFC Cup.json: -------------------------------------------------------------------------------- 1 | { 2 | "Group Stage": { 3 | "format": "Groups", 4 | "h2h": true, 5 | "leg": 2, 6 | "advance_to": { 7 | "Round of 16": { 8 | "start": 1, 9 | "end": 16 10 | } 11 | }, 12 | "groups": { 13 | "Group A": [ 14 | 4408, 15 | 7764, 16 | 9044, 17 | 673909 18 | ], 19 | "Group B": [ 20 | 4104, 21 | 671152, 22 | 689022, 23 | 973710 24 | ], 25 | "Group C": [ 26 | 7761, 27 | 11481, 28 | 673800, 29 | 688913 30 | ], 31 | "Group D": [ 32 | 5069, 33 | 5421, 34 | 11509, 35 | 11511 36 | ], 37 | "Group E": [ 38 | 840, 39 | 1333, 40 | 4025, 41 | 689390 42 | ], 43 | "Group F": [ 44 | 1005, 45 | 1171, 46 | 1330, 47 | 4019 48 | ], 49 | "Group G": [ 50 | 140, 51 | 1173, 52 | 1514, 53 | 1532 54 | ], 55 | "Group H": [ 56 | 1182, 57 | 1513, 58 | 2102, 59 | 3835 60 | ] 61 | } 62 | }, 63 | "Round of 16": { 64 | "format": "Knockout", 65 | "leg": 2, 66 | "advance_to": "Quarter-finals" 67 | }, 68 | "Quarter-finals": { 69 | "format": "Knockout", 70 | "leg": 2, 71 | "advance_to": "Semi-finals" 72 | }, 73 | "Semi-finals": { 74 | "format": "Knockout", 75 | "leg": 2, 76 | "advance_to": "Final" 77 | }, 78 | "Final": { 79 | "format": "Knockout", 80 | "leg": 1, 81 | "advance_to": "Winner" 82 | }, 83 | "Winner": { 84 | "format": "Winner" 85 | } 86 | } -------------------------------------------------------------------------------- /infrastructure/modules/bigquery/variables.tf: -------------------------------------------------------------------------------- 1 | variable "dataset_id" { 2 | description = "Unique ID for the dataset being provisioned." 3 | type = string 4 | } 5 | 6 | variable "tables" { 7 | description = "A map of objects which include table_id and schema." 8 | type = map(string) 9 | default = {} 10 | } 11 | 12 | variable "external_tables" { 13 | description = "A map of objects which include table_id and external_data_configuration." 14 | type = map(object({ 15 | schema = string, 16 | source_format = string, 17 | source_uris = optional(list(string)) 18 | hive_partitioning_options = optional(object({ 19 | source_uri_prefix = string, 20 | })) 21 | })) 22 | default = {} 23 | } 24 | 25 | variable "views" { 26 | description = "A map of objects which include view_id and view query." 27 | type = map(string) 28 | default = {} 29 | } 30 | 31 | variable "routines" { 32 | description = "A map of objects which include routine_id, routine_type, routine_language, definition_body, return_type and arguments." 33 | type = map(object({ 34 | definition_body = string, 35 | routine_type = string, 36 | language = string, 37 | return_type = optional(string), 38 | arguments = optional(list(object({ 39 | name = string, 40 | data_type = string 41 | }))), 42 | })) 43 | default = {} 44 | } 45 | 46 | variable "scheduled_queries" { 47 | description = "Data transfer configuration for creating scheduled queries." 48 | type = map(any) 49 | default = {} 50 | } 51 | 52 | variable "service_account_name" { 53 | description = "Default service account to apply to the scheduled queries." 54 | type = string 55 | default = null 56 | } 57 | 58 | variable "location" { 59 | description = "The regional location for the dataset." 60 | type = string 61 | } 62 | 63 | variable "project_id" { 64 | description = "Project where the dataset and table are created." 65 | type = string 66 | } 67 | 68 | variable "deletion_protection" { 69 | description = "Whether or not to allow Terraform to destroy the instance. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the instance will fail." 70 | type = bool 71 | default = true 72 | } 73 | -------------------------------------------------------------------------------- /src/bigquery/sql/operations/get_daily_suggestions.sql: -------------------------------------------------------------------------------- 1 | WITH matches AS ( 2 | SELECT 3 | hkjc.id, 4 | kick_off_time, 5 | tournament_name, 6 | home_name, 7 | away_name, 8 | functions.matchProbs( 9 | avg_goal + league_solver.home_adv * hkjc.home_adv + home_solver.offence + away_solver.defence, 10 | avg_goal - league_solver.home_adv * hkjc.home_adv + away_solver.offence + home_solver.defence, 11 | handicap 12 | ) AS hdc_probs, 13 | HDC_H, 14 | HDC_A, 15 | handicap, 16 | CAST(SPLIT(handicap, '/')[0] AS FLOAT64) AS HG1, 17 | CAST(SPLIT(handicap, '/')[SAFE_OFFSET(1)] AS FLOAT64) AS HG2 18 | FROM hkjc.odds_today hkjc 19 | JOIN `master.teams` home_teams ON hkjc.home_id = home_teams.hkjc_id 20 | JOIN `solver.teams_latest` home_solver ON home_solver.id = home_teams.solver_id 21 | AND home_solver._TYPE = home_teams.type 22 | JOIN `master.teams` away_teams ON hkjc.away_id = away_teams.hkjc_id 23 | JOIN `solver.teams_latest` away_solver ON away_solver.id = away_teams.solver_id 24 | AND away_solver._TYPE = away_teams.type 25 | JOIN master.leagues ON hkjc.tournament_id = leagues.hkjc_id 26 | JOIN `solver.leagues_latest` league_solver ON leagues.division = league_solver.division 27 | AND league_solver._TYPE = leagues.type 28 | WHERE 29 | (SAFE_CAST(home_teams.solver_id AS INT64) IS NOT NULL OR home_teams.type = 'International') 30 | AND (SAFE_CAST(away_teams.solver_id AS INT64) IS NOT NULL OR away_teams.type = 'International') 31 | ), 32 | 33 | kelly AS ( 34 | SELECT 35 | id, 36 | kick_off_time, 37 | tournament_name, 38 | home_name, 39 | away_name, 40 | hdc_H, 41 | hdc_A, 42 | handicap, 43 | hdc_probs[0] - hdc_probs[2] / (hdc_H - 1) AS kelly_hdc_home, 44 | hdc_probs[2] - hdc_probs[0] / (hdc_A - 1) AS kelly_hdc_away, 45 | FROM matches 46 | ) 47 | 48 | SELECT 49 | id, 50 | kick_off_time, 51 | tournament_name, 52 | home_name, 53 | away_name, 54 | '主' AS team, 55 | handicap, 56 | hdc_H AS odds, 57 | kelly_hdc_home AS amount, 58 | FROM kelly 59 | WHERE kelly_hdc_home > 0 60 | UNION ALL 61 | SELECT 62 | id, 63 | kick_off_time, 64 | tournament_name, 65 | home_name, 66 | away_name, 67 | '客' AS team, 68 | handicap, 69 | hdc_A, 70 | kelly_hdc_away, 71 | FROM kelly 72 | WHERE kelly_hdc_away > 0 73 | ORDER BY kick_off_time, id -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/groups.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from dataclasses import dataclass 3 | 4 | from simulation.models import Match, TieBreaker, Team 5 | from .season import Season 6 | 7 | 8 | @dataclass 9 | class Groups: 10 | groups: dict[str, list[Team]] 11 | avg_goal: float 12 | home_adv: float 13 | matches: list[Match] 14 | h2h: bool = False 15 | leg: int = 2 16 | advance_to: str | dict[str, int] | None = None 17 | 18 | def __post_init__(self): 19 | self.matches = self.matches or [] 20 | self._positions = defaultdict(list) 21 | 22 | group_matches = self.get_matches_by_groups(self.matches, self.groups) 23 | self._groups = [ 24 | Season( 25 | teams, 26 | self.avg_goal, 27 | self.home_adv, 28 | group_matches[name], 29 | self.h2h, 30 | self.leg, 31 | ) 32 | for name, teams in self.groups.items() 33 | ] 34 | 35 | @property 36 | def teams(self) -> list[Team]: 37 | return [team for group in self._groups for team in group.teams] 38 | 39 | @property 40 | def positions(self) -> list[Team]: 41 | positions = [] 42 | for _, teams in sorted(self._positions.items()): 43 | positions.extend(sorted(teams, key=TieBreaker.goal_diff, reverse=True)) 44 | return positions 45 | 46 | @staticmethod 47 | def get_matches_by_groups( 48 | matches: list[Match], groups: dict[str, list[Team]] 49 | ) -> dict[str, list[Match]]: 50 | team_group = {team: group for group, teams in groups.items() for team in teams} 51 | group_matches = defaultdict(list) 52 | for match in matches: 53 | group_matches[team_group[match.home_team]].append(match) 54 | return group_matches 55 | 56 | def simulate(self): 57 | for group in self._groups: 58 | group.simulate() 59 | for position, team in enumerate(group.positions, 1): 60 | self._positions[position].append(team) 61 | 62 | def get_advanced(self, end: int, start: int = 1) -> list[Team]: 63 | return self.positions[start - 1 : end] 64 | 65 | def reset(self): 66 | self._positions = defaultdict(list) 67 | 68 | for group in self._groups: 69 | group.reset() 70 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/simulation_ucl_gs.sql: -------------------------------------------------------------------------------- 1 | WITH result AS ( 2 | SELECT 3 | teams.transfermarkt_id, 4 | teams.name, 5 | rating, 6 | offence, 7 | defence, 8 | table.scored - table.conceded AS goal_diff, 9 | table.wins * 3 + table.draws + COALESCE(table.correction, 0) AS points, 10 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) + COALESCE(positions._5, 0) + COALESCE(positions._6, 0) + COALESCE(positions._7, 0) + COALESCE(positions._8, 0) + COALESCE(positions._9, 0) + COALESCE(positions._10, 0) + COALESCE(positions._11, 0) + COALESCE(positions._12, 0) + COALESCE(positions._13, 0) + COALESCE(positions._14, 0) + COALESCE(positions._15, 0) + COALESCE(positions._16, 0) + COALESCE(positions._17, 0) + COALESCE(positions._18, 0) + COALESCE(positions._19, 0) + COALESCE(positions._20, 0) + COALESCE(positions._21, 0) + COALESCE(positions._22, 0) + COALESCE(positions._23, 0) + COALESCE(positions._24, 0) AS top24, 11 | COALESCE(positions._1, 0) + COALESCE(positions._2, 0) + COALESCE(positions._3, 0) + COALESCE(positions._4, 0) + COALESCE(positions._5, 0) + COALESCE(positions._6, 0) + COALESCE(positions._7, 0) + COALESCE(positions._8) AS top8, 12 | COALESCE(rounds.knockout_round_play_offs, 0) AS po, 13 | COALESCE(rounds.round_of_16, 0) AS r16, 14 | COALESCE(rounds.quarter_finals, 0) AS qf, 15 | COALESCE(rounds.semi_finals, 0) AS sf, 16 | COALESCE(rounds.final, 0) AS f, 17 | COALESCE(rounds.winner, 0) AS winner, 18 | leagues._DATE_UNIX 19 | FROM `simulation.leagues_latest` leagues 20 | JOIN master.teams ON leagues.team = teams.footystats_id 21 | JOIN solver.team_ratings ON teams.solver_id = team_ratings.id 22 | WHERE _LEAGUE = 'Europe UEFA Champions League' 23 | AND team_ratings._TYPE = 'Club' 24 | AND ROUND(table.wins + table.draws + table.losses) = 8 25 | ) 26 | 27 | SELECT 28 | transfermarkt_id, 29 | name, 30 | ROUND(rating, 1) AS rating, 31 | ROUND(offence, 2) AS offence, 32 | ROUND(defence, 2) AS defence, 33 | ROUND(goal_diff, 1) AS goal_diff, 34 | ROUND(points, 1) AS points, 35 | ROUND(po, 3) AS po, 36 | ROUND(r16, 3) AS r16, 37 | ROUND(qf, 3) AS qf, 38 | ROUND(sf, 3) AS sf, 39 | ROUND(f, 3) AS f, 40 | ROUND(winner, 3) AS winner, 41 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_ADD(TIMESTAMP_SECONDS(_DATE_UNIX), INTERVAL 2 HOUR), 'Asia/Hong_Kong') AS date_unix 42 | FROM result 43 | ORDER BY points DESC -------------------------------------------------------------------------------- /src/bigquery/schema/footystats/teams.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "id", 4 | "type": "INTEGER", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "name", 9 | "type": "STRING", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "cleanName", 14 | "type": "STRING", 15 | "mode": "REQUIRED" 16 | }, 17 | { 18 | "name": "english_name", 19 | "type": "STRING", 20 | "mode": "REQUIRED" 21 | }, 22 | { 23 | "name": "shortHand", 24 | "type": "STRING", 25 | "mode": "REQUIRED" 26 | }, 27 | { 28 | "name": "country", 29 | "type": "STRING", 30 | "mode": "REQUIRED" 31 | }, 32 | { 33 | "name": "continent", 34 | "type": "STRING" 35 | }, 36 | { 37 | "name": "founded", 38 | "type": "STRING" 39 | }, 40 | { 41 | "name": "image", 42 | "type": "STRING", 43 | "mode": "REQUIRED" 44 | }, 45 | { 46 | "name": "flag_element", 47 | "type": "STRING" 48 | }, 49 | { 50 | "name": "season", 51 | "type": "STRING", 52 | "mode": "REQUIRED" 53 | }, 54 | { 55 | "name": "seasonClean", 56 | "type": "STRING" 57 | }, 58 | { 59 | "name": "url", 60 | "type": "STRING", 61 | "mode": "REQUIRED" 62 | }, 63 | { 64 | "name": "table_position", 65 | "type": "INTEGER", 66 | "mode": "REQUIRED" 67 | }, 68 | { 69 | "name": "performance_rank", 70 | "type": "INTEGER", 71 | "mode": "REQUIRED" 72 | }, 73 | { 74 | "name": "risk", 75 | "type": "INTEGER", 76 | "mode": "REQUIRED" 77 | }, 78 | { 79 | "name": "season_format", 80 | "type": "STRING", 81 | "mode": "REQUIRED" 82 | }, 83 | { 84 | "name": "competition_id", 85 | "type": "INTEGER", 86 | "mode": "REQUIRED" 87 | }, 88 | { 89 | "name": "full_name", 90 | "type": "STRING", 91 | "mode": "REQUIRED" 92 | }, 93 | { 94 | "name": "alt_names", 95 | "type": "STRING", 96 | "mode": "REPEATED" 97 | }, 98 | { 99 | "name": "official_sites", 100 | "type": "STRING", 101 | "mode": "REPEATED" 102 | } 103 | ] -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/season.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from dataclasses import dataclass 3 | from functools import partial 4 | from itertools import combinations, permutations 5 | 6 | from simulation.models import Team, TieBreaker, Match 7 | 8 | 9 | @dataclass 10 | class Season: 11 | teams: list[Team] 12 | avg_goal: float 13 | home_adv: float 14 | matches: list[Match] 15 | h2h: bool = False 16 | leg: int = 2 17 | advance_to: str | dict[str, int] | None = None 18 | 19 | def __post_init__(self): 20 | self.matches = self.matches or self.scheduling(self.teams) 21 | 22 | @property 23 | def scheduling(self): 24 | if self.leg == 1: 25 | return partial(combinations, r=2) 26 | if self.leg == 2: 27 | return partial(permutations, r=2) 28 | raise ValueError 29 | 30 | @property 31 | def _home_adv(self): 32 | if self.leg == 1: 33 | return 0 34 | return self.home_adv 35 | 36 | @property 37 | def tiebreaker(self): 38 | return TieBreaker.h2h if self.h2h else TieBreaker.goal_diff 39 | 40 | @property 41 | def positions(self) -> list[Team]: 42 | points = defaultdict(list) 43 | for team in self.teams: 44 | points[team.table.points].append(team) 45 | 46 | if self.h2h: 47 | for teams in points.values(): 48 | if len(teams) < 2: 49 | continue 50 | for match in self.matches: 51 | if match.home_team in teams and match.away_team in teams: 52 | match.log_teams_table(h2h=True) 53 | 54 | return sorted( 55 | self.teams, 56 | key=self.tiebreaker, 57 | reverse=True, 58 | ) 59 | 60 | def simulate(self): 61 | for match in self.matches: 62 | if not match.is_complete: 63 | match.simulate(self.avg_goal, self._home_adv) 64 | match.log_teams_table() 65 | 66 | for position, team in enumerate(self.positions, 1): 67 | team.log_sim_table() 68 | team.log_sim_positions(position) 69 | 70 | def get_advanced(self, end: int, start: int = 1) -> list[Team]: 71 | return self.positions[start - 1 : end] 72 | 73 | def reset(self): 74 | for match in self.matches: 75 | match.reset() 76 | for team in self.teams: 77 | team.reset() 78 | -------------------------------------------------------------------------------- /src/function/footystats_get_data/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import functions_framework 5 | import requests 6 | from cloudevents.http.event import CloudEvent 7 | 8 | from gcp import storage 9 | from gcp.logging import setup_logging 10 | from gcp.util import decode_message 11 | 12 | setup_logging() 13 | 14 | 15 | BUCKET_NAMES = { 16 | "matches": os.environ["MATCHES_BUCKET_NAME"], 17 | "season": os.environ["SEASONS_BUCKET_NAME"], 18 | "tables": os.environ["TABLES_BUCKET_NAME"], 19 | "teams": os.environ["TEAMS_BUCKET_NAME"], 20 | } 21 | 22 | 23 | class TooManyRequestsError(Exception): 24 | pass 25 | 26 | 27 | @functions_framework.cloud_event 28 | def main(cloud_event: CloudEvent): 29 | message = decode_message(cloud_event) 30 | endpoint, season_id = message["endpoint"], message["season_id"] 31 | data = get_footystats( 32 | endpoint, key=os.environ["FOOTYSTATS_API_KEY"], season_id=season_id 33 | ) 34 | storage.upload_json_to_bucket( 35 | data, 36 | blob_name=f"{endpoint}.json", 37 | bucket_name=BUCKET_NAMES[endpoint], 38 | hive_partitioning={ 39 | "_COUNTRY": message["country"], 40 | "_NAME": message["name"], 41 | "_YEAR": message["year"], 42 | "_SEASON_ID": season_id, 43 | }, 44 | ) 45 | 46 | 47 | def get_footystats(endpoint: str, key: str, **kwargs) -> dict | list[dict]: 48 | page = 1 49 | results = [] 50 | 51 | while True: 52 | logging.info(f"Getting footystats data: {endpoint=}, {page=}, {kwargs=}") 53 | 54 | try: 55 | response = requests.get( 56 | f"https://api.football-data-api.com/league-{endpoint}", 57 | params={"key": key, "page": page, **kwargs}, 58 | timeout=5, 59 | ) 60 | response.raise_for_status() 61 | except (requests.exceptions.HTTPError, requests.exceptions.ReadTimeout): 62 | logging.warning( 63 | f"Get footystats data failed: {endpoint=}, {page=}, {kwargs=}" 64 | ) 65 | raise TooManyRequestsError() 66 | 67 | response = response.json() 68 | data = response["data"] 69 | 70 | if isinstance(data, dict): 71 | return data 72 | results.extend(data) 73 | 74 | logging.info(f"Got footystats data: {endpoint=}, {page=}, {kwargs=}") 75 | 76 | pager = response["pager"] 77 | if pager["current_page"] >= pager["max_page"]: 78 | return results 79 | page += 1 80 | -------------------------------------------------------------------------------- /infrastructure/modules/services/main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | service_identities = flatten([ 3 | for api, roles in var.activate_api_identities : [ 4 | for role in roles : 5 | { api = api, role = role } 6 | ] 7 | ]) 8 | } 9 | 10 | resource "google_project_service" "project_services" { 11 | for_each = toset(var.activate_apis) 12 | 13 | project = var.project_id 14 | service = each.value 15 | } 16 | 17 | resource "time_sleep" "wait_activate_api" { 18 | count = var.enable_apis ? 1 : 0 19 | 20 | create_duration = var.activate_api_sleep_duration 21 | 22 | depends_on = [google_project_service.project_services] 23 | } 24 | 25 | resource "google_project_service_identity" "project_service_identities" { 26 | for_each = { 27 | for api, roles in var.activate_api_identities : 28 | api => roles 29 | if !contains(["compute.googleapis.com", "storage.googleapis.com"], api) 30 | } 31 | 32 | provider = google-beta 33 | project = var.project_id 34 | service = each.key 35 | 36 | depends_on = [google_project_service.project_services] 37 | } 38 | 39 | data "google_compute_default_service_account" "service_account" { 40 | count = length([for api in var.activate_apis : api if api == "compute.googleapis.com"]) > 0 ? 1 : 0 41 | 42 | project = var.project_id 43 | 44 | depends_on = [time_sleep.wait_activate_api] 45 | } 46 | 47 | data "google_storage_project_service_account" "service_account" { 48 | count = length([for api in var.activate_apis : api if api == "storage.googleapis.com"]) > 0 ? 1 : 0 49 | 50 | project = var.project_id 51 | 52 | depends_on = [time_sleep.wait_activate_api] 53 | } 54 | 55 | locals { 56 | add_service_roles = merge( 57 | { 58 | for si in local.service_identities : 59 | "${si.api} ${si.role}" => { 60 | email = google_project_service_identity.project_service_identities[si.api].email 61 | role = si.role 62 | } 63 | if !contains(["compute.googleapis.com", "storage.googleapis.com"], si.api) 64 | }, 65 | { 66 | for si in local.service_identities : 67 | "${si.api} ${si.role}" => { 68 | email = data.google_compute_default_service_account.service_account[0].email 69 | role = si.role 70 | } 71 | if si.api == "compute.googleapis.com" 72 | }, 73 | { 74 | for si in local.service_identities : 75 | "${si.api} ${si.role}" => { 76 | email = data.google_storage_project_service_account.service_account[0].email_address 77 | role = si.role 78 | } 79 | if si.api == "storage.googleapis.com" 80 | } 81 | ) 82 | } 83 | 84 | resource "google_project_iam_member" "project_service_identity_roles" { 85 | for_each = local.add_service_roles 86 | 87 | project = var.project_id 88 | role = each.value.role 89 | member = "serviceAccount:${each.value.email}" 90 | 91 | depends_on = [time_sleep.wait_activate_api] 92 | } 93 | -------------------------------------------------------------------------------- /src/bigquery/sql/outputs/results.sql: -------------------------------------------------------------------------------- 1 | WITH matches AS ( 2 | SELECT 3 | matches.id, 4 | leagues.display_order, 5 | matches.date_unix, 6 | leagues.transfermarkt_id AS league_transfermarkt_id, 7 | home_teams.transfermarkt_id AS home_team_transfermarkt_id, 8 | home_teams.name AS home_team_name, 9 | away_teams.transfermarkt_id AS away_team_transfermarkt_id, 10 | away_teams.name AS away_team_name, 11 | homeGoalCount, 12 | awayGoalCount, 13 | home_adj, 14 | away_adj, 15 | CASE 16 | WHEN team_a_xg > 0 THEN team_a_xg 17 | END AS team_a_xg, 18 | CASE 19 | WHEN team_b_xg > 0 THEN team_b_xg 20 | END AS team_b_xg, 21 | functions.matchProbs( 22 | league_solver.avg_goal + league_solver.home_adv + home_solver.offence + away_solver.defence, 23 | league_solver.avg_goal - league_solver.home_adv + away_solver.offence + home_solver.defence, 24 | '0') AS had_probs 25 | FROM footystats.matches 26 | JOIN footystats.matches_transformed USING (id) 27 | JOIN `master.teams` home_teams ON matches.homeID = home_teams.footystats_id 28 | JOIN `master.teams` away_teams ON matches.awayID = away_teams.footystats_id 29 | JOIN master.leagues ON matches._NAME = leagues.footystats_name 30 | JOIN `solver.leagues` league_solver ON league_solver._DATE_UNIX < date_unix 31 | AND leagues.type = league_solver._TYPE 32 | AND leagues.division = league_solver.division 33 | JOIN `solver.teams` home_solver ON league_solver._DATE_UNIX = home_solver._DATE_UNIX 34 | AND league_solver._TYPE = home_solver._TYPE 35 | AND home_teams.solver_id = home_solver.id 36 | JOIN `solver.teams` away_solver ON league_solver._DATE_UNIX = away_solver._DATE_UNIX 37 | AND league_solver._TYPE = away_solver._TYPE 38 | AND away_teams.solver_id = away_solver.id 39 | WHERE matches.status = 'complete' 40 | AND date_unix >= UNIX_SECONDS(TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -5 DAY)) 41 | AND (home_teams.is_simulate 42 | OR away_teams.is_simulate 43 | OR leagues.is_simulate 44 | OR home_teams.country = 'Hong Kong' 45 | OR away_teams.country = 'Hong Kong' 46 | OR leagues.is_manual) 47 | QUALIFY ROW_NUMBER() OVER (PARTITION BY matches.id ORDER BY league_solver._DATE_UNIX DESC) = 1 48 | ORDER BY date_unix DESC, display_order, matches.id 49 | LIMIT 100 50 | ) 51 | 52 | SELECT 53 | FORMAT_TIMESTAMP('%F %H:%M', TIMESTAMP_SECONDS(date_unix), 'Asia/Hong_Kong') AS matchDate, 54 | league_transfermarkt_id, 55 | home_team_transfermarkt_id, 56 | home_team_name, 57 | away_team_transfermarkt_id, 58 | away_team_name, 59 | ROUND(had_probs[0], 2) AS had_home, 60 | ROUND(had_probs[1], 2) AS had_draw, 61 | ROUND(had_probs[2], 2) AS had_away, 62 | homeGoalCount, 63 | awayGoalCount, 64 | ROUND(home_adj, 2) AS home_adj, 65 | ROUND(away_adj, 2) AS away_adj, 66 | ROUND(team_a_xg, 2) AS team_a_xg, 67 | ROUND(team_b_xg, 2) AS team_b_xg 68 | FROM matches 69 | ORDER BY display_order, league_transfermarkt_id, date_unix DESC, matches.id -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/knockout.py: -------------------------------------------------------------------------------- 1 | import random 2 | from collections import defaultdict 3 | from dataclasses import dataclass 4 | 5 | from simulation.models import Match, Team 6 | 7 | 8 | @dataclass 9 | class Knockout: 10 | name: str 11 | avg_goal: float 12 | home_adv: float 13 | matches: list[Match] | None = None 14 | leg: int = 2 15 | advance_to: str | dict[str, int] | None = None 16 | winning_teams: set[Team] | None = None 17 | 18 | def __post_init__(self): 19 | if not self.leg in (1, 2): 20 | raise ValueError 21 | 22 | self.teams: list[Team] = [] 23 | self.matches = self.matches or [] 24 | self.winning_teams = self.winning_teams or set() 25 | self._winning_teams = self.winning_teams.copy() 26 | 27 | @property 28 | def _home_adv(self): 29 | if self.leg == 1: 30 | return 0 31 | return self.home_adv 32 | 33 | def add_teams(self, teams: list[Team]): 34 | name = self.name.lower().replace(" ", "_").replace("-", "_") 35 | for team in teams: 36 | self.teams.append(team) 37 | team.log_sim_rounds(name) 38 | 39 | @staticmethod 40 | def draw_series( 41 | teams: set[Team], scheduled_matches: list[Match], leg: int = 2 42 | ) -> dict[tuple[Team], list[Match]]: 43 | series = defaultdict(list) 44 | drawn = {team for match in scheduled_matches for team in match.teams} 45 | undrawn = [team for team in teams if team not in drawn] 46 | random.shuffle(undrawn) 47 | 48 | for match in scheduled_matches: 49 | series[frozenset(match.teams)].append(match) 50 | 51 | for i in range(0, len(undrawn), 2): 52 | home_team = undrawn[i] 53 | away_team = undrawn[i + 1] 54 | key = frozenset((home_team, away_team)) 55 | series[key].append(Match(home_team, away_team)) 56 | if leg == 2: 57 | series[key].append(Match(away_team, home_team)) 58 | 59 | return series 60 | 61 | def simulate(self): 62 | series = self.draw_series(self.teams, self.matches, self.leg) 63 | for matches in series.values(): 64 | if self.leg == 2: 65 | leg1, leg2 = matches[0], matches[1] 66 | if not leg1.is_complete: 67 | leg1.simulate(self.avg_goal, self._home_adv) 68 | agg = leg1 + leg2 69 | else: 70 | agg = matches[0] 71 | 72 | if not agg.is_complete: 73 | agg.simulate(self.avg_goal, self._home_adv, is_cup=True) 74 | 75 | if agg.winning_team: 76 | self.winning_teams.add(agg.winning_team) 77 | 78 | def get_advanced(self) -> list[Team]: 79 | return list(self.winning_teams) 80 | 81 | def reset(self): 82 | self.teams = [] 83 | for match in self.matches: 84 | match.reset() 85 | self.winning_teams = self._winning_teams.copy() 86 | -------------------------------------------------------------------------------- /src/function/hkjc_get_results/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime, timedelta 4 | 5 | import functions_framework 6 | import pytz 7 | import requests 8 | 9 | from gcp import storage 10 | from gcp.logging import setup_logging 11 | 12 | setup_logging() 13 | 14 | 15 | @functions_framework.cloud_event 16 | def main(_): 17 | date = get_yesterday() 18 | 19 | storage.upload_json_to_bucket( 20 | data=get_hkjc_result(date), 21 | blob_name="results.json", 22 | bucket_name=os.environ["BUCKET_NAME"], 23 | hive_partitioning={"_DATE": date}, 24 | ) 25 | 26 | 27 | def get_hkjc_result(date: str) -> list[dict]: 28 | page_size = 20 29 | page = 1 30 | results = [] 31 | 32 | body = """ 33 | query matchResults($startDate: String, $endDate: String, $startIndex: Int,$endIndex: Int,$teamId: String) { 34 | timeOffset { 35 | fb 36 | } 37 | matchNumByDate(startDate: $startDate, endDate: $endDate, teamId: $teamId) { 38 | total 39 | } 40 | matches: matchResult(startDate: $startDate, endDate: $endDate, startIndex: $startIndex,endIndex: $endIndex, teamId: $teamId) { 41 | id 42 | status 43 | frontEndId 44 | matchDayOfWeek 45 | matchNumber 46 | matchDate 47 | kickOffTime 48 | sequence 49 | homeTeam { 50 | id 51 | name_en 52 | name_ch 53 | } 54 | awayTeam { 55 | id 56 | name_en 57 | name_ch 58 | } 59 | tournament { 60 | code 61 | name_en 62 | name_ch 63 | } 64 | results { 65 | homeResult 66 | awayResult 67 | resultConfirmType 68 | payoutConfirmed 69 | stageId 70 | resultType 71 | sequence 72 | } 73 | poolInfo { 74 | payoutRefundPools 75 | refundPools 76 | ntsInfo 77 | entInfo 78 | definedPools 79 | } 80 | } 81 | } 82 | """ 83 | while True: 84 | logging.info(f"Getting HKJC result: {date=}, {page=}") 85 | 86 | response = requests.post( 87 | url="https://info.cld.hkjc.com/graphql/base/", 88 | headers={"content-type": "application/json"}, 89 | json={ 90 | "query": body, 91 | "variables": { 92 | "startDate": date, 93 | "endDate": date, 94 | "startIndex": (page - 1) * page_size + 1, 95 | "endIndex": page * page_size, 96 | }, 97 | }, 98 | timeout=5, 99 | ) 100 | response.raise_for_status() 101 | data = response.json()["data"] 102 | results.extend(data["matches"]) 103 | 104 | logging.info(f"Got HKJC result: {date=}, {page=}") 105 | 106 | if len(results) == data["matchNumByDate"]["total"]: 107 | return results 108 | 109 | page += 1 110 | 111 | 112 | def get_yesterday(): 113 | yesterday = datetime.now(tz=pytz.timezone("Hongkong")) - timedelta(days=1) 114 | return yesterday.strftime("%Y-%m-%d") 115 | -------------------------------------------------------------------------------- /src/bigquery/sql/operations/get_kelly_ratio.sql: -------------------------------------------------------------------------------- 1 | WITH matches AS ( 2 | SELECT 3 | functions.matchProbs(avg_goal + league_solver.home_adv * odds_latest.home_adv + home_solver.offence + away_solver.defence, avg_goal - league_solver.home_adv * odds_latest.home_adv + away_solver.offence + home_solver.defence, handicap) AS hdc_probs, 4 | HDC_H, 5 | HDC_A, 6 | CAST(SPLIT(handicap, '/')[0] AS FLOAT64) AS HG1, 7 | CAST(SPLIT(handicap, '/')[SAFE_OFFSET(1)] AS FLOAT64) AS HG2, 8 | home_score - away_score AS goal_diff 9 | FROM hkjc.odds_latest 10 | JOIN hkjc.scores USING(id) 11 | JOIN `master.teams` home_teams ON odds_latest.home_id = home_teams.hkjc_id 12 | JOIN `solver.teams` home_solver ON home_solver.id = home_teams.solver_id 13 | AND home_solver._TYPE = home_teams.type 14 | AND odds_latest._TIMESTAMP >= TIMESTAMP_SECONDS(home_solver._DATE_UNIX) 15 | JOIN `master.teams` away_teams ON odds_latest.away_id = away_teams.hkjc_id 16 | JOIN `solver.teams` away_solver ON away_solver.id = away_teams.solver_id 17 | AND away_solver._TYPE = away_teams.type 18 | AND away_solver._DATE_UNIX = home_solver._DATE_UNIX 19 | JOIN master.leagues ON odds_latest.tournament_id = leagues.hkjc_id 20 | JOIN `solver.leagues` league_solver ON leagues.division = league_solver.division 21 | AND league_solver._TYPE = leagues.type 22 | AND league_solver._DATE_UNIX = home_solver._DATE_UNIX 23 | WHERE 24 | (SAFE_CAST(home_teams.solver_id AS INT64) IS NOT NULL OR home_teams.type = 'International') 25 | AND (SAFE_CAST(away_teams.solver_id AS INT64) IS NOT NULL OR away_teams.type = 'International') 26 | AND kick_off_time >= '2024-09-28' 27 | QUALIFY ROW_NUMBER() OVER (PARTITION BY odds_latest.id ORDER BY home_solver._DATE_UNIX DESC) = 1 28 | ), 29 | 30 | kelly AS ( 31 | SELECT 32 | hdc_H, 33 | hdc_A, 34 | hdc_probs[0] - hdc_probs[2] / (hdc_H - 1) AS kelly_hdc_home, 35 | hdc_probs[2] - hdc_probs[0] / (hdc_A - 1) AS kelly_hdc_away, 36 | CASE 37 | WHEN goal_diff + HG1 > 0 THEN 1 38 | WHEN goal_diff + HG1 = 0 THEN 0 39 | ELSE -1 40 | END AS hdc_home_win1, 41 | CASE 42 | WHEN HG2 IS NOT NULL THEN 43 | CASE 44 | WHEN goal_diff + HG2 > 0 THEN 1 45 | WHEN goal_diff + HG2 = 0 THEN 0 46 | ELSE -1 47 | END 48 | END AS hdc_home_win2, 49 | kelly_ratio 50 | FROM matches 51 | CROSS JOIN UNNEST(GENERATE_ARRAY(10, 50, 1)) kelly_ratio 52 | ), 53 | 54 | results AS ( 55 | SELECT 56 | hdc_H AS odds, 57 | kelly_hdc_home / kelly_ratio AS amount, 58 | CASE 59 | WHEN hdc_home_win2 IS NULL THEN hdc_home_win1 60 | ELSE (hdc_home_win1 + hdc_home_win2) / 2 61 | END AS result, 62 | kelly_ratio 63 | FROM kelly 64 | WHERE kelly_hdc_home > 0 65 | UNION ALL 66 | SELECT 67 | hdc_A, 68 | kelly_hdc_away / kelly_ratio, 69 | CASE 70 | WHEN hdc_home_win2 IS NULL THEN -hdc_home_win1 71 | ELSE -(hdc_home_win1 + hdc_home_win2) / 2 72 | END, 73 | kelly_ratio 74 | FROM kelly 75 | WHERE kelly_hdc_away > 0 76 | ) 77 | 78 | SELECT 79 | kelly_ratio, 80 | EXP(SUM(LOG(CASE 81 | WHEN result > 0 THEN 1 + amount * (odds - 1) * result 82 | ELSE 1 + amount * result 83 | END))) AS product 84 | FROM results 85 | GROUP BY kelly_ratio 86 | ORDER BY product DESC 87 | LIMIT 1 -------------------------------------------------------------------------------- /infrastructure/modules/event-function/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "The name to apply to any nameable resources." 3 | type = string 4 | } 5 | 6 | variable "runtime" { 7 | description = "The runtime in which the function will be executed." 8 | type = string 9 | default = "python312" 10 | } 11 | 12 | variable "entry_point" { 13 | description = "The name of a method in the function source which will be invoked when the function is executed." 14 | type = string 15 | default = "main" 16 | } 17 | 18 | variable "docker_repository" { 19 | type = string 20 | default = null 21 | description = "User managed repository created in Artifact Registry optionally with a customer managed encryption key." 22 | } 23 | 24 | variable "bucket_name" { 25 | description = "The name of the Google Cloud Storage bucket used for storing the function's source code." 26 | type = string 27 | } 28 | 29 | variable "timeout_s" { 30 | description = "The amount of time in seconds allotted for the execution of the function." 31 | type = number 32 | default = 60 33 | } 34 | 35 | variable "available_memory" { 36 | description = "The amount of memory allotted for the function to use." 37 | type = string 38 | default = "256Mi" 39 | } 40 | 41 | variable "max_instance_request_concurrency" { 42 | description = "The maximum number of concurrent requests that each instance can receive." 43 | type = number 44 | default = 1 45 | } 46 | 47 | variable "available_cpu" { 48 | description = "The number of CPUs used in a single container instance." 49 | type = string 50 | default = "0.1666" 51 | } 52 | 53 | variable "environment_variables" { 54 | description = "A set of key/value environment variable pairs to assign to the function." 55 | type = map(string) 56 | default = {} 57 | } 58 | 59 | variable "max_instances" { 60 | description = "The maximum number of parallel executions of the function." 61 | type = number 62 | default = 100 63 | } 64 | 65 | variable "secret_environment_variables" { 66 | description = "A list of secret names (not the full secret IDs) to be assigned to the function as secret environment variables." 67 | type = list(string) 68 | default = [] 69 | } 70 | 71 | variable "event_type" { 72 | description = "The type of event to observe." 73 | type = string 74 | } 75 | 76 | variable "event_filters" { 77 | description = "A map of key-value pairs representing filters used to selectively trigger the function based on specific events." 78 | type = map(string) 79 | default = {} 80 | } 81 | 82 | variable "topic_name" { 83 | description = "The name of a Pub/Sub topic that will be used as the transport topic for the event delivery." 84 | type = string 85 | default = null 86 | } 87 | 88 | variable "event_trigger_failure_policy" { 89 | description = "A toggle to determine if the function should be retried on failure." 90 | type = string 91 | default = "RETRY_POLICY_DO_NOT_RETRY" 92 | } 93 | 94 | variable "source_directory" { 95 | description = "The pathname of the directory which contains the function source code." 96 | type = string 97 | } 98 | 99 | variable "region" { 100 | description = "The region in which resources will be applied." 101 | type = string 102 | } 103 | 104 | variable "project_id" { 105 | description = "The ID of the project to which resources will be applied." 106 | type = string 107 | } 108 | 109 | -------------------------------------------------------------------------------- /infrastructure/modules/bigquery/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_bigquery_dataset" "dataset" { 2 | dataset_id = var.dataset_id 3 | location = var.location 4 | project = var.project_id 5 | delete_contents_on_destroy = var.deletion_protection 6 | } 7 | 8 | resource "google_bigquery_table" "native_tables" { 9 | for_each = var.tables 10 | dataset_id = google_bigquery_dataset.dataset.dataset_id 11 | table_id = each.key 12 | schema = each.value 13 | project = var.project_id 14 | deletion_protection = var.deletion_protection 15 | } 16 | 17 | resource "google_bigquery_table" "external_tables" { 18 | for_each = var.external_tables 19 | dataset_id = google_bigquery_dataset.dataset.dataset_id 20 | table_id = each.key 21 | project = var.project_id 22 | deletion_protection = false 23 | 24 | external_data_configuration { 25 | autodetect = false 26 | schema = each.value["schema"] 27 | source_format = each.value["source_format"] 28 | source_uris = each.value["source_uris"] 29 | ignore_unknown_values = true 30 | 31 | dynamic "hive_partitioning_options" { 32 | for_each = each.value["hive_partitioning_options"] != null ? [each.value["hive_partitioning_options"]] : [] 33 | content { 34 | mode = "CUSTOM" 35 | source_uri_prefix = hive_partitioning_options.value["source_uri_prefix"] 36 | } 37 | } 38 | } 39 | } 40 | 41 | resource "google_bigquery_table" "views" { 42 | for_each = var.views 43 | dataset_id = google_bigquery_dataset.dataset.dataset_id 44 | table_id = each.key 45 | project = var.project_id 46 | deletion_protection = false 47 | 48 | view { 49 | query = each.value 50 | use_legacy_sql = false 51 | } 52 | 53 | depends_on = [ 54 | google_bigquery_table.native_tables, 55 | google_bigquery_table.external_tables 56 | ] 57 | } 58 | 59 | resource "google_bigquery_routine" "routines" { 60 | for_each = var.routines 61 | dataset_id = google_bigquery_dataset.dataset.dataset_id 62 | routine_id = each.key 63 | definition_body = each.value["definition_body"] 64 | routine_type = each.value["routine_type"] 65 | language = each.value["language"] 66 | return_type = each.value["return_type"] 67 | project = var.project_id 68 | 69 | dynamic "arguments" { 70 | for_each = each.value["arguments"] == null ? [] : each.value["arguments"] 71 | content { 72 | name = arguments.value["name"] 73 | data_type = arguments.value["data_type"] 74 | } 75 | } 76 | 77 | depends_on = [ 78 | google_bigquery_table.native_tables, 79 | google_bigquery_table.external_tables 80 | ] 81 | } 82 | 83 | resource "google_bigquery_data_transfer_config" "scheduled_queries" { 84 | for_each = var.scheduled_queries 85 | display_name = each.key 86 | data_source_id = "scheduled_query" 87 | destination_dataset_id = google_bigquery_dataset.dataset.dataset_id 88 | schedule = each.value.schedule 89 | disabled = !var.deletion_protection 90 | location = var.location 91 | service_account_name = var.service_account_name 92 | project = var.project_id 93 | params = { 94 | destination_table_name_template = each.key 95 | write_disposition = "WRITE_TRUNCATE" 96 | query = each.value.query 97 | } 98 | 99 | depends_on = [ 100 | google_bigquery_table.native_tables, 101 | google_bigquery_table.external_tables 102 | ] 103 | } 104 | -------------------------------------------------------------------------------- /infrastructure/modules/scheduled-function/variables.tf: -------------------------------------------------------------------------------- 1 | variable "function_name" { 2 | description = "The name to apply to the function." 3 | type = string 4 | } 5 | 6 | variable "function_runtime" { 7 | description = "The runtime in which the function will be executed." 8 | type = string 9 | default = "python312" 10 | } 11 | 12 | variable "function_entry_point" { 13 | description = "The name of a method in the function source which will be invoked when the function is executed." 14 | type = string 15 | default = "main" 16 | } 17 | 18 | variable "docker_repository" { 19 | type = string 20 | default = null 21 | description = "User managed repository created in Artifact Registry optionally with a customer managed encryption key." 22 | } 23 | 24 | variable "bucket_name" { 25 | type = string 26 | description = "The name to apply to the bucket." 27 | } 28 | 29 | variable "function_timeout_s" { 30 | description = "The amount of time in seconds allotted for the execution of the function." 31 | type = number 32 | default = 60 33 | } 34 | 35 | variable "function_available_memory" { 36 | description = "The amount of memory in megabytes allotted for the function to use." 37 | type = string 38 | default = "256Mi" 39 | } 40 | 41 | variable "function_available_cpu" { 42 | description = "The number of CPUs used in a single container instance." 43 | type = string 44 | default = "0.1666" 45 | } 46 | 47 | variable "function_environment_variables" { 48 | description = "A set of key/value environment variable pairs to assign to the function." 49 | type = map(string) 50 | default = {} 51 | } 52 | 53 | variable "function_max_instances" { 54 | description = "The maximum number of parallel executions of the function." 55 | type = number 56 | default = 100 57 | } 58 | 59 | variable "function_secret_environment_variables" { 60 | description = "A list of secret names (not the full secret IDs) to be assigned to the function as secret environment variables." 61 | type = list(string) 62 | default = [] 63 | } 64 | 65 | variable "function_event_filters" { 66 | description = "A map of key-value pairs representing filters used to selectively trigger the function based on specific events." 67 | type = map(string) 68 | default = {} 69 | } 70 | 71 | variable "function_event_trigger_failure_policy" { 72 | description = "The retry policy for the function when a triggered event results in a failure." 73 | type = string 74 | default = "RETRY_POLICY_DO_NOT_RETRY" 75 | } 76 | 77 | variable "function_source_directory" { 78 | description = "The contents of this directory will be archived and used as the function source." 79 | type = string 80 | } 81 | 82 | variable "job_name" { 83 | description = "The name of the scheduled job to run." 84 | type = string 85 | } 86 | 87 | variable "job_schedule" { 88 | description = "The job frequency, in cron syntax." 89 | type = string 90 | } 91 | 92 | variable "job_paused" { 93 | description = "Sets the job to a paused state." 94 | type = bool 95 | default = false 96 | } 97 | 98 | variable "message_data" { 99 | description = "The data to send in the topic message." 100 | type = string 101 | default = "Hello World" 102 | } 103 | 104 | variable "topic_name" { 105 | description = "Name of pubsub topic connecting the scheduled job and the function." 106 | type = string 107 | } 108 | 109 | variable "region" { 110 | type = string 111 | description = "The region in which resources will be applied." 112 | } 113 | 114 | variable "project_id" { 115 | type = string 116 | description = "The ID of the project where the resources will be created." 117 | } 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multiplicative Rating Model for Football 2 | 3 | ## Introduction 4 | This model makes reference to the [course material](https://www.coursera.org/learn/mathematics-sport/lecture/nR8wd/8-4-multiplicative-rating-models-for-soccer) of Math behind Moneyball instructed by Professor Wayne Winston and FiveThirtyEight's [club soccer predictions](https://projects.fivethirtyeight.com/soccer-predictions). In the lecture, Professor used solver add-in in Excel for calculation, which takes a long time in finding solutions. To speed up the process, this python script uses a solver from `pulp` which is much times faster in some cases. 5 | 6 | ## Result 7 | [Hong Kong Football Prediction (in Traditional Chinese)](https://docs.google.com/spreadsheets/d/1mlWjjkJEDogGUujwi0ShMBhc36J1-il67fTG8ldaZqg/) 8 | 9 | ## Methodology 10 | The expected goals for home team and away team are calculated as follows: 11 | ``` 12 | home_team_forecasted_goals = average_goals + home_advantage + home_team_offensive_rating + away_team_defensive_rating 13 | away_team_forecasted_goals = average_goals - home_advantage + away_team_offensive_rating + home_team_defensive_rating 14 | ``` 15 | And the solver finds the best values for each rating by minimising the following function: 16 | ``` 17 | objective_function = abs(home_team_forecasted_goals - home_team_adjusted_goals) + abs(away_team_forecasted_goals - awya_team_adjusted_goals) 18 | ``` 19 | A 0.35 offensive rating means the team is expected to score 0.35 more goal and a 0.35 defensive rating means the team is expected to concede 0.35 more goal compared to an average team. 20 | 21 | Unlike the 22 | [Elo rating system](https://en.wikipedia.org/wiki/Elo_rating_system), a team rating does not necessarily improve whenever it wins a match. If the team performs worse than the model expected, its ratings can decline. 23 | 24 | In addition, recent matches are given more weight to reflect a team's recent performance. 25 | 26 | ## Adjusting Goals 27 | Soccer is a tricky sport to model because there are so few goals scored in each match. The final result may not reflect the performance of each team well. To migrate the randomness and estimate team ratings better, two metrics are used in the calculation using in-depth match stats from [Footy Stats API](https://docs.footystats.org/): 28 | 29 | 1. For *adjusted goals*, goals scored late by a leading team may not be important. Using `goal_timings` columns, the value of a goal by a leading team decreases linearly after the 70th minute. A goal in the 90th minute or later only worths 0.5 goals in the calculation. 30 | 31 | 2. For [*expected goals*](https://youtu.be/w7zPZsLGK18), `xg` columns (if available) are used. 32 | 33 | The average of the above two metrics is used as `forecasted goals` in the calculation. 34 | 35 | 36 | ## Simulating Matches 37 | Poisson distributions are used here. 38 | 39 | ## Team Rating 40 | To calculate team rating, the expected goal to score and expected goal concede of each team against an average team in the model can be calculated using the same formula above. The percentage of possible points against an average team is the team rating. For example, if a team is forecast to have a 50% probability to win (scoring three points), 25% to draw (scoring one point), 25% to lose (scoring no points) against an average team. The team rating of the team is: 41 | ``` 42 | (0.50 * 3 + 0.25 * 1 + 0.25 * 0)/3 = 58.3 43 | ``` 44 | From the formulae, the distribution of team ratings is not linear. Below is a general guideline from [ESPN](https://www.espn.com/world-cup/story/_/id/4447078/ce/us/guide-espn-spi-ratings): 45 | 46 | Rating | Strength 47 | --- | --- 48 | 85-100 | Elite 49 | 80-84 | Very Strong 50 | 75-79 | Strong 51 | 70-74| Good 52 | 60-69| Competitive 53 | 50-59| Marginal 54 | 25-49| Weak 55 | 0-24| Very Weak 56 | 57 | Theoretically, a team with a rating of 100 would win every other team, while a team with a rating of 0 would lose to every other team in the model. 58 | 59 | ## Simulating Seasons Using Monte Carlo Method 60 | To be updated. 61 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/models/match.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import random 4 | import numpy as np 5 | 6 | from .team import Team 7 | 8 | 9 | @dataclass 10 | class Match: 11 | home_team: Team 12 | away_team: Team 13 | status: str = "incomplete" 14 | home_score: int = 0 15 | away_score: int = 0 16 | 17 | def __post_init__(self): 18 | self._status = self.status 19 | self._home_score = self.home_score 20 | self._away_score = self.away_score 21 | self._winning_team = None 22 | 23 | def __add__(self, other: "Match") -> "Match": 24 | if (self.home_team, self.away_team) != (other.away_team, other.home_team): 25 | raise ValueError 26 | if self.is_complete and other.is_complete: 27 | status = "complete" 28 | else: 29 | status = "incomplete" 30 | return Match( 31 | home_team=self.away_team, 32 | away_team=self.home_team, 33 | status=status, 34 | home_score=self.away_score + other.home_score, 35 | away_score=self.home_score + other.away_score, 36 | ) 37 | 38 | @property 39 | def teams(self) -> tuple[Team]: 40 | return (self.home_team, self.away_team) 41 | 42 | @property 43 | def is_complete(self) -> bool: 44 | return self.status == "complete" 45 | 46 | @property 47 | def winning_team(self) -> Team | None: 48 | if not self.is_complete: 49 | return None 50 | if self.home_score > self.away_score: 51 | return self.home_team 52 | if self.away_score > self.home_score: 53 | return self.away_team 54 | return self._winning_team 55 | 56 | def _simulate(self, avg_goal: float, home_adv: float, extra_time: bool = False): 57 | home_exp = avg_goal + home_adv + self.home_team.offence + self.away_team.defence 58 | away_exp = avg_goal - home_adv + self.away_team.offence + self.home_team.defence 59 | if extra_time: 60 | home_exp /= 3 61 | away_exp /= 3 62 | home_exp = max(home_exp, 0.2) 63 | away_exp = max(away_exp, 0.2) 64 | self.home_score += np.random.poisson(home_exp) 65 | self.away_score += np.random.poisson(away_exp) 66 | 67 | def set_status_complete(self): 68 | self.status = "complete" 69 | 70 | def simulate(self, avg_goal: float, home_adv: float, is_cup: bool = False): 71 | self._simulate(avg_goal, home_adv) 72 | if self.winning_team or not is_cup: 73 | self.set_status_complete() 74 | return 75 | 76 | self._simulate(avg_goal, home_adv, extra_time=True) 77 | if self.winning_team: 78 | self.set_status_complete() 79 | return 80 | 81 | self._winning_team = random.choice([self.home_team, self.away_team]) 82 | self.set_status_complete() 83 | 84 | def log_teams_table(self, h2h=False): 85 | if h2h: 86 | home_table = self.home_team.h2h_table 87 | away_table = self.away_team.h2h_table 88 | else: 89 | home_table = self.home_team.table 90 | away_table = self.away_team.table 91 | 92 | if self.winning_team == self.home_team: 93 | home_table.wins += 1 94 | away_table.losses += 1 95 | elif self.winning_team == self.away_team: 96 | away_table.wins += 1 97 | home_table.losses += 1 98 | else: 99 | home_table.draws += 1 100 | away_table.draws += 1 101 | 102 | home_table.scored += self.home_score 103 | away_table.scored += self.away_score 104 | home_table.conceded += self.away_score 105 | away_table.conceded += self.home_score 106 | 107 | def reset(self): 108 | self.status = self._status 109 | self.home_score = self._home_score 110 | self.away_score = self._away_score 111 | self._winning_team = None 112 | -------------------------------------------------------------------------------- /src/function/simulate_tournament/simulation/tournaments/tournament.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict, dataclass 2 | 3 | from simulation.models import Match, Team 4 | from .groups import Groups 5 | from .knockout import Knockout 6 | from .rounds import Round 7 | from .season import Season 8 | from .winner import Winner 9 | 10 | 11 | @dataclass 12 | class Tournament: 13 | avg_goal: float 14 | home_adv: float 15 | teams: dict[str, Team] 16 | matches: dict[str, list[Match]] | None = None 17 | groups: dict[str, list[Team]] | None = None 18 | 19 | def __post_init__(self): 20 | self.rounds: dict[str, Round] = {} 21 | 22 | def create_round(self, name: str, param: dict) -> Round: 23 | _format = param["format"] 24 | 25 | if _format == "Groups": 26 | self.groups = self.groups or { 27 | group: [self.teams[team] for team in _teams] 28 | for group, _teams in param["groups"].items() 29 | } 30 | return Groups( 31 | self.groups, 32 | self.avg_goal, 33 | self.home_adv, 34 | self.matches[name], 35 | param["h2h"], 36 | param["leg"], 37 | param.get("advance_to"), 38 | ) 39 | 40 | if _format == "Knockout": 41 | return Knockout( 42 | name, 43 | self.avg_goal, 44 | self.home_adv, 45 | self.matches[name], 46 | param["leg"], 47 | param.get("advance_to"), 48 | winning_teams={ 49 | team 50 | for match in self.matches.get(param["advance_to"], []) 51 | for team in match.teams 52 | }, 53 | ) 54 | 55 | if _format == "Season": 56 | return Season( 57 | self.teams.values(), 58 | self.avg_goal, 59 | self.home_adv, 60 | self.matches[name], 61 | param["h2h"], 62 | param["leg"], 63 | param.get("advance_to"), 64 | ) 65 | 66 | if _format == "Winner": 67 | return Winner() 68 | 69 | raise ValueError(f"Unknown round format: {_format}") 70 | 71 | def set_rounds(self, rounds: dict[str, dict]): 72 | for name, param in rounds.items(): 73 | self.rounds[name] = self.create_round(name, param) 74 | 75 | def simulate(self, no_of_simulations: int = 1000) -> list[dict]: 76 | for _ in range(no_of_simulations): 77 | for name, round_obj in self.rounds.items(): 78 | round_obj.simulate() 79 | 80 | if advance_to := round_obj.advance_to: 81 | if isinstance(advance_to, str): 82 | self.rounds[advance_to].add_teams(round_obj.get_advanced()) 83 | else: 84 | for name, positions in advance_to.items(): 85 | self.rounds[name].add_teams( 86 | round_obj.get_advanced(**positions) 87 | ) 88 | round_obj.reset() 89 | 90 | for team in self.teams.values(): 91 | team.sim_table /= no_of_simulations 92 | team.sim_rounds /= no_of_simulations 93 | team.sim_positions /= no_of_simulations 94 | 95 | @property 96 | def result(self): 97 | if self.groups: 98 | return [ 99 | { 100 | "team": team.name, 101 | "group": group, 102 | "positions": dict(team.sim_positions), 103 | "rounds": dict(team.sim_rounds), 104 | "table": asdict(team.sim_table), 105 | } 106 | for group, teams in self.groups.items() 107 | for team in teams 108 | ] 109 | 110 | return [ 111 | { 112 | "team": team.name, 113 | "positions": dict(team.sim_positions), 114 | "rounds": dict(team.sim_rounds), 115 | "table": asdict(team.sim_table), 116 | } 117 | for team in self.teams.values() 118 | ] 119 | -------------------------------------------------------------------------------- /src/bigquery/schema/hkjc/results.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "id", 4 | "type": "INTEGER", 5 | "mode": "REQUIRED" 6 | }, 7 | { 8 | "name": "status", 9 | "type": "STRING", 10 | "mode": "REQUIRED" 11 | }, 12 | { 13 | "name": "frontEndId", 14 | "type": "STRING", 15 | "mode": "REQUIRED" 16 | }, 17 | { 18 | "name": "matchDayOfWeek", 19 | "type": "STRING" 20 | }, 21 | { 22 | "name": "matchNumber", 23 | "type": "STRING" 24 | }, 25 | { 26 | "name": "matchDate", 27 | "type": "STRING", 28 | "mode": "REQUIRED" 29 | }, 30 | { 31 | "name": "kickOffTime", 32 | "type": "TIMESTAMP", 33 | "mode": "REQUIRED" 34 | }, 35 | { 36 | "name": "sequence", 37 | "type": "STRING", 38 | "mode": "REQUIRED" 39 | }, 40 | { 41 | "name": "homeTeam", 42 | "type": "RECORD", 43 | "mode": "NULLABLE", 44 | "fields": [ 45 | { 46 | "name": "id", 47 | "type": "INTEGER", 48 | "mode": "REQUIRED" 49 | }, 50 | { 51 | "name": "name_en", 52 | "type": "STRING", 53 | "mode": "REQUIRED" 54 | }, 55 | { 56 | "name": "name_ch", 57 | "type": "STRING", 58 | "mode": "REQUIRED" 59 | } 60 | ] 61 | }, 62 | { 63 | "name": "awayTeam", 64 | "type": "RECORD", 65 | "mode": "NULLABLE", 66 | "fields": [ 67 | { 68 | "name": "id", 69 | "type": "INTEGER", 70 | "mode": "REQUIRED" 71 | }, 72 | { 73 | "name": "name_en", 74 | "type": "STRING", 75 | "mode": "REQUIRED" 76 | }, 77 | { 78 | "name": "name_ch", 79 | "type": "STRING", 80 | "mode": "REQUIRED" 81 | } 82 | ] 83 | }, 84 | { 85 | "name": "tournament", 86 | "type": "RECORD", 87 | "mode": "NULLABLE", 88 | "fields": [ 89 | { 90 | "name": "code", 91 | "type": "STRING", 92 | "mode": "REQUIRED" 93 | }, 94 | { 95 | "name": "name_en", 96 | "type": "STRING", 97 | "mode": "REQUIRED" 98 | }, 99 | { 100 | "name": "name_ch", 101 | "type": "STRING", 102 | "mode": "REQUIRED" 103 | } 104 | ] 105 | }, 106 | { 107 | "name": "results", 108 | "type": "RECORD", 109 | "mode": "REPEATED", 110 | "fields": [ 111 | { 112 | "name": "homeResult", 113 | "type": "INTEGER" 114 | }, 115 | { 116 | "name": "awayResult", 117 | "type": "INTEGER" 118 | }, 119 | { 120 | "name": "resultConfirmType", 121 | "type": "INTEGER" 122 | }, 123 | { 124 | "name": "payoutConfirmed", 125 | "type": "BOOLEAN" 126 | }, 127 | { 128 | "name": "stageId", 129 | "type": "INTEGER" 130 | }, 131 | { 132 | "name": "resultType", 133 | "type": "INTEGER" 134 | }, 135 | { 136 | "name": "sequence", 137 | "type": "INTEGER" 138 | } 139 | ] 140 | }, 141 | { 142 | "name": "poolInfo", 143 | "type": "RECORD", 144 | "mode": "NULLABLE", 145 | "fields": [ 146 | { 147 | "name": "payoutRefundPools", 148 | "type": "STRING", 149 | "mode": "REPEATED" 150 | }, 151 | { 152 | "name": "refundPools", 153 | "type": "STRING", 154 | "mode": "REPEATED" 155 | }, 156 | { 157 | "name": "ntsInfo", 158 | "type": "STRING", 159 | "mode": "REPEATED" 160 | }, 161 | { 162 | "name": "entInfo", 163 | "type": "STRING", 164 | "mode": "REPEATED" 165 | } 166 | ] 167 | } 168 | ] -------------------------------------------------------------------------------- /src/function/hkjc_get_odds/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from datetime import datetime 5 | 6 | import functions_framework 7 | import requests 8 | 9 | from gcp import storage 10 | from gcp.logging import setup_logging 11 | 12 | setup_logging() 13 | 14 | 15 | @functions_framework.cloud_event 16 | def main(_): 17 | storage.upload_json_to_bucket( 18 | data=get_hkjc_odds(odds_types=json.loads(os.environ["ODDS_TYPES"])), 19 | blob_name="odds.json", 20 | bucket_name=os.environ["BUCKET_NAME"], 21 | hive_partitioning={"_TIMESTAMP": get_current_timestamp()}, 22 | ) 23 | 24 | 25 | def get_hkjc_odds(odds_types: list[str]) -> dict: 26 | logging.info(f"Getting HKJC data: {odds_types=}") 27 | body = """ 28 | query matchList($startIndex: Int, $endIndex: Int,$startDate: String, $endDate: String, $matchIds: [String], $tournIds: [String], $fbOddsTypes: [FBOddsType]!, $fbOddsTypesM: [FBOddsType]!, $inplayOnly: Boolean, $featuredMatchesOnly: Boolean, $frontEndIds: [String], $earlySettlementOnly: Boolean, $showAllMatch: Boolean) { 29 | matches(startIndex: $startIndex,endIndex: $endIndex, startDate: $startDate, endDate: $endDate, matchIds: $matchIds, tournIds: $tournIds, fbOddsTypes: $fbOddsTypesM, inplayOnly: $inplayOnly, featuredMatchesOnly: $featuredMatchesOnly, frontEndIds: $frontEndIds, earlySettlementOnly: $earlySettlementOnly, showAllMatch: $showAllMatch) { 30 | id 31 | frontEndId 32 | matchDate 33 | kickOffTime 34 | status 35 | updateAt 36 | sequence 37 | esIndicatorEnabled 38 | homeTeam { 39 | id 40 | name_en 41 | name_ch 42 | } 43 | awayTeam { 44 | id 45 | name_en 46 | name_ch 47 | } 48 | tournament { 49 | id 50 | frontEndId 51 | nameProfileId 52 | isInteractiveServiceAvailable 53 | code 54 | name_en 55 | name_ch 56 | } 57 | isInteractiveServiceAvailable 58 | inplayDelay 59 | venue { 60 | code 61 | name_en 62 | name_ch 63 | } 64 | tvChannels { 65 | code 66 | name_en 67 | name_ch 68 | } 69 | liveEvents { 70 | id 71 | code 72 | } 73 | featureStartTime 74 | featureMatchSequence 75 | poolInfo { 76 | normalPools 77 | inplayPools 78 | sellingPools 79 | ntsInfo 80 | entInfo 81 | } 82 | runningResult { 83 | homeScore 84 | awayScore 85 | corner 86 | } 87 | runningResultExtra { 88 | homeScore 89 | awayScore 90 | corner 91 | } 92 | adminOperation { 93 | remark { 94 | typ 95 | } 96 | } 97 | foPools(fbOddsTypes: $fbOddsTypes) { 98 | id 99 | status 100 | oddsType 101 | instNo 102 | inplay 103 | name_ch 104 | name_en 105 | updateAt 106 | expectedSuspendDateTime 107 | lines { 108 | lineId 109 | status 110 | condition 111 | main 112 | combinations { 113 | combId 114 | str 115 | status 116 | offerEarlySettlement 117 | currentOdds 118 | selections { 119 | selId 120 | str 121 | name_ch 122 | name_en 123 | } 124 | } 125 | } 126 | } 127 | } 128 | } 129 | """ 130 | response = requests.post( 131 | url="https://info.cld.hkjc.com/graphql/base/", 132 | headers={"content-type": "application/json"}, 133 | json={ 134 | "query": body, 135 | "variables": { 136 | "fbOddsTypes": odds_types, 137 | "fbOddsTypesM": odds_types, 138 | "showAllMatch": True, 139 | }, 140 | }, 141 | timeout=5, 142 | ) 143 | response.raise_for_status() 144 | matches = response.json()["data"]["matches"] 145 | logging.info(f"Got HKJC data: {odds_types=}") 146 | return matches 147 | 148 | 149 | def get_current_timestamp(): 150 | return datetime.now().isoformat() 151 | -------------------------------------------------------------------------------- /assets/leagues.csv: -------------------------------------------------------------------------------- 1 | Argentina Copa Argentina,,AGC,arca,false, 2 | Argentina Primera División,,APL,ar1n,false, 3 | Argentina Supercopa Argentina,,AUC,scin,false, 4 | Asia AFC Champions League,,ACL,acle,true, 5 | Asia AFC Cup,,AC2,acl2,true, 6 | Australia A-League,,AD1,aus1,false, 7 | Australia FFA Cup,,AFA,ffac,false, 8 | Belgium Belgian Cup,,BFC,ccb,false, 9 | Belgium Belgian Super Cup,,BSC,besc,false, 10 | Belgium Pro League,,BFL,be1,false, 11 | Brazil Copa do Brasil,,BDC,brc,false, 12 | Brazil Paulista A1,,BPC,bcp1,false, 13 | Brazil Serie A,,BD1,bra1,false, 14 | Chile Copa Chile,,CHC,ch1c,false, 15 | Chile Primera División,,CD1,clpd,false, 16 | Chile Super Cup,,CSC,csuc,false, 17 | China China League One,中國甲級聯賽,,clo,true,17 18 | China Chinese FA Cup,中國足協盃,,chfa,false,29 19 | China Chinese Super League,中國超級聯賽,,csl,true,16 20 | China Super Cup,中國超級盃,,chsc,false,16 21 | England Championship,,ED1,gb2,false, 22 | England Community Shield,,ESH,gbcs,false, 23 | England EFL League One,,ED2,gb3,false, 24 | England EFL League Two,,ED3,gb4,false, 25 | England EFL Trophy,,ELT,gbfl,false, 26 | England FA Cup,,EFA,fac,false, 27 | England League Cup,,ELC,cgb,false, 28 | England Premier League,,EPL,gb1,true, 29 | Europe UEFA Champions League,,UCL,cl,true, 30 | Europe UEFA Europa Conference League,,UEF,ucol,false, 31 | Europe UEFA Europa League,,UEC,el,false, 32 | Europe UEFA Super Cup,,USC,usc,false, 33 | Finland Finnish Cup,,,fil,false, 34 | Finland Veikkausliiga,,FVL,fi1,false, 35 | France Coupe de France,,FFA,frc,false, 36 | France Ligue 1,,FFL,fr1,true, 37 | France Ligue 2,,FF2,fr2,false, 38 | France Trophee des Champions - Super Cup,,FCT,frch,false, 39 | Germany 2. Bundesliga,,GD2,l2,false, 40 | Germany Bundesliga,,GSL,l1,true, 41 | Germany DFB Pokal,,GSC,dfb,false, 42 | Germany DFL Super Cup,,GCC,dfl,false, 43 | Germany Germany Play offs 1 2,,,l1po,false, 44 | Hong Kong Hong Kong FA Cup,香港足總盃,,hkfa,false,02 45 | Hong Kong Hong Kong Premier League,香港超級聯賽,,hgkg,true,01 46 | Hong Kong Senior Shield,香港高級組銀牌,,hksc,false,03 47 | Hong Kong Sapling Cup,香港菁英盃,,hkp2,false,04 48 | International AFC Asian Cup,,ASC,am23,false, 49 | International AFF Championship,,SEC,af24,false, 50 | International Africa Cup of Nations,,ANC,afcn,false, 51 | International Africa Cup of Nations Qualification,,ANQ,afcq,false, 52 | International Asian Cup Qualification,,ASQ,acqu,true, 53 | International CONCACAF Champions League,,CNC,ccl,false, 54 | International CONCACAF Gold Cup,,CGC,gocu,false, 55 | International Copa America,,COA,cam4,false, 56 | International EAFF E1 Football Championship,,EAC,eafc,false, 57 | International FIFA Club World Cup,,CWP,klub,true, 58 | International International Friendlies,,INT,,false, 59 | International UEFA Euro Championship,,EUC,em24,false, 60 | International UEFA Euro Qualifiers,,EUQ,emq,false, 61 | International UEFA Nations League,,ENL,unla,false, 62 | International WC Qualification Asia,,WCQ,powm,true, 63 | Italy Coppa Italia,,IFC,cit,false, 64 | Italy Serie A,,ISA,it1,true, 65 | Italy Supercoppa Italiana,,ISC,sci,false, 66 | Japan Emperor Cup,,JEC,jemp,false, 67 | Japan J-League Cup,,JLC,japc,false, 68 | Japan J1 League,,JD1,jap1,true, 69 | Japan J2 League,,JD2,jap2,false, 70 | Japan Super Cup,,JSC,jasp,false, 71 | Mexico Ascenso MX,,MD1,mex2,false, 72 | Mexico Campeón de Campeones,,MCC,mxsc,false, 73 | Mexico Liga MX,,MXL,mex1,false, 74 | Mexico Supercopa MX,,MSC,sc14,false, 75 | Netherlands Dutch Super Cup,,DSC,nlsc,false, 76 | Netherlands Eerste Divisie,,DF2,nl2,false, 77 | Netherlands Eredivisie,,DFL,nl1,false, 78 | Netherlands KNVB Cup,,DAC,nlp,false, 79 | Norway Eliteserien,,NTL,no1,false, 80 | Norway Norwegian Football Cup,,NWC,nopo,false, 81 | Portugal Liga NOS,,PFL,po1,false, 82 | Portugal Portuguese League Cup,,PLC,pocp,false, 83 | Portugal Portuguese Super Cup,,PSC,posu,false, 84 | Portugal Taça de Portugal,,PFC,popo,false, 85 | Qatar Emir Cup,,QAC,qec1,false, 86 | Qatar Qatar Stars Cup,,QSC,qscp,false, 87 | Qatar Stars League,,QSL,qsl,false, 88 | Russia Russian Cup,,RFC,rup,false, 89 | Russia Russian Premier League,,RPL,ru1,false, 90 | Russia Russian Super Cup,,RSC,russ,false, 91 | Saudi Arabia Kings Cup,,SKC,sakc,false, 92 | Saudi Arabia Professional League,,SDL,sa1,false, 93 | Scotland Premiership,,SPL,sc1,false, 94 | Scotland Scottish Cup,,SFA,sfa,false, 95 | Scotland Scottish League Cup,,SLC,scp,false, 96 | South America CONMEBOL Recopa Sudamericana,,SAS,reco,false, 97 | South America Copa Libertadores,,LBC,cli,false, 98 | South America Copa Sudamericana,,SAC,cs,false, 99 | South Korea K League 1,,KD1,rsk1,false, 100 | South Korea Korean FA Cup,,KFA,rkpo,false, 101 | Spain Copa del Rey,,SFC,cdr,false, 102 | Spain La Liga,,SFL,es1,true, 103 | Spain Segunda División,,SF2,es2,false, 104 | Spain Supercopa de Espana,,SSC,suc,false, 105 | Sweden Allsvenskan,,SAL,se1,false, 106 | Sweden Svenska Cupen,,SWC,sec,false, 107 | UAE Arabian Gulf League,,UPR,uae1,false, 108 | UAE UAE League Cup,,ULP,,false, 109 | USA MLS,,MLS,mls1,false, 110 | USA USL Championship,,USL,usl,false, 111 | USA US Open Cup,,MLC,mlsp,false, -------------------------------------------------------------------------------- /src/function/footystats_transform_matches/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import re 5 | from enum import IntEnum 6 | 7 | from cloudevents.http.event import CloudEvent 8 | import functions_framework 9 | 10 | from gcp import storage 11 | from gcp.logging import setup_logging 12 | 13 | setup_logging() 14 | 15 | REDUCE_FROM_MINUTE = 70 16 | REDUCE_LEADING_GOAL_VALUE = 0.5 17 | REDUCE_RED_CARD_GOAL_VALUE = 0.2 18 | XG_WEIGHT = 0.67 19 | ADJ_FACTORS = { 20 | (False, False): 1, 21 | (True, False): 1, 22 | (False, True): 1.04, 23 | (True, True): 1.05, 24 | } 25 | XG_ADJ_FACTOR = 1.1 26 | 27 | RESULT_BUCKET_NAME = os.environ["BUCKET_NAME"] 28 | 29 | 30 | @functions_framework.cloud_event 31 | def main(cloud_event: CloudEvent): 32 | message = cloud_event.data 33 | blob_name = message["name"] 34 | bucket_name = message["bucket"] 35 | 36 | logging.info("Transforming blob: %s", blob_name) 37 | blob = storage.download_blob( 38 | blob_name, 39 | bucket_name, 40 | ) 41 | lines = blob.splitlines() 42 | 43 | data = [] 44 | for line in lines: 45 | match = json.loads(line) 46 | match = transform_match(match) 47 | data.append(match) 48 | 49 | storage.upload_json_to_bucket(data, blob_name, bucket_name=RESULT_BUCKET_NAME) 50 | 51 | 52 | class Team(IntEnum): 53 | HOME = 0 54 | AWAY = 1 55 | 56 | 57 | def transform_match(_match: dict) -> dict: 58 | home_adj: float = _match["homeGoalCount"] 59 | away_adj: float = _match["awayGoalCount"] 60 | more_player_team: Team | None = None 61 | 62 | valid_home_goals = all(minute != "None" for minute in _match["homeGoals"]) 63 | valid_away_goals = all(minute != "None" for minute in _match["awayGoals"]) 64 | goal_timings_recorded = ( 65 | _match["goal_timings_recorded"] == 1 and valid_home_goals and valid_away_goals 66 | ) 67 | card_timings_recorded = _match["card_timings_recorded"] == 1 68 | 69 | if card_timings_recorded: 70 | more_player_team = get_more_players_team( 71 | _match["team_a_red_cards"], _match["team_b_red_cards"] 72 | ) 73 | 74 | if goal_timings_recorded: 75 | goal_timings = get_goal_timings_dict(_match["homeGoals"], _match["awayGoals"]) 76 | home_adj, away_adj = reduce_goal_value(goal_timings, more_player_team) 77 | elif more_player_team == Team.HOME: 78 | home_adj *= 1 - (REDUCE_RED_CARD_GOAL_VALUE / 2) 79 | elif more_player_team == Team.AWAY: 80 | away_adj *= 1 - (REDUCE_RED_CARD_GOAL_VALUE / 2) 81 | 82 | adj_factor = ADJ_FACTORS[(card_timings_recorded, goal_timings_recorded)] 83 | home_adj *= adj_factor 84 | away_adj *= adj_factor 85 | 86 | if _match["total_xg"] > 0: 87 | home_avg = ( 88 | home_adj * (1 - XG_WEIGHT) + _match["team_a_xg"] * XG_ADJ_FACTOR * XG_WEIGHT 89 | ) 90 | away_avg = ( 91 | away_adj * (1 - XG_WEIGHT) + _match["team_b_xg"] * XG_ADJ_FACTOR * XG_WEIGHT 92 | ) 93 | else: 94 | home_avg, away_avg = home_adj, away_adj 95 | 96 | return { 97 | "id": _match["id"], 98 | "home_adj": home_adj, 99 | "away_adj": away_adj, 100 | "home_avg": home_avg, 101 | "away_avg": away_avg, 102 | } 103 | 104 | 105 | def get_more_players_team(home_red_cards: int, away_red_cards: int) -> Team | None: 106 | if home_red_cards > away_red_cards: 107 | return Team.AWAY 108 | if away_red_cards > home_red_cards: 109 | return Team.HOME 110 | return None 111 | 112 | 113 | def get_goal_timings_dict(home: list[str], away: list[str]) -> list[tuple]: 114 | timings = [ 115 | (int(re.search(r"(^1?\d{1,2})", minute).group()), minute, Team.HOME) 116 | for minute in home 117 | ] 118 | timings.extend( 119 | (int(re.search(r"(^1?\d{1,2})", minute).group()), minute, Team.AWAY) 120 | for minute in away 121 | ) 122 | return sorted(timings) 123 | 124 | 125 | def reduce_goal_value( 126 | goal_timings: list[tuple[int, "Team"]], more_player_team: "Team" 127 | ) -> tuple[float, float]: 128 | if not goal_timings: 129 | return 0, 0 130 | home = home_adj = away = away_adj = 0 131 | for timing, _, team in goal_timings: 132 | timing = min(timing, 90) 133 | late_leading_adj_val = ( 134 | max(timing - REDUCE_FROM_MINUTE, 0) 135 | / (90 - REDUCE_FROM_MINUTE) 136 | * REDUCE_LEADING_GOAL_VALUE 137 | ) 138 | more_player_adj_val = (timing / 90) * REDUCE_RED_CARD_GOAL_VALUE 139 | 140 | goal_val = 1 141 | if team == more_player_team: 142 | goal_val *= 1 - more_player_adj_val 143 | if team == Team.AWAY: 144 | away += 1 145 | if away - home > 1: 146 | goal_val *= 1 - late_leading_adj_val 147 | away_adj += goal_val 148 | elif team == Team.HOME: 149 | home += 1 150 | if home - away > 1: 151 | goal_val *= 1 - late_leading_adj_val 152 | home_adj += goal_val 153 | return home_adj, away_adj 154 | --------------------------------------------------------------------------------