├── utils
    ├── __init__.py
    └── utils.py
├── runtime.txt
├── requirements.txt
├── vercel.json
├── docker-compose.yml
├── api
    ├── scrapers
    │   ├── __init__.py
    │   ├── health.py
    │   ├── news.py
    │   ├── stats.py
    │   ├── rankings.py
    │   ├── events.py
    │   └── matches.py
    └── scrape.py
├── .github
    ├── dependabot.yml
    ├── ISSUE_TEMPLATE
    │   └── bug-report.yml
    └── workflows
    │   ├── docker_dev.yml
    │   └── docker.yml
├── Dockerfile
├── main.py
├── LICENSE
├── .gitignore
├── routers
    └── vlr_router.py
└── README.md


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.9.5
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.32.4
2 | uvicorn==0.34.3
3 | fastapi==0.115.13
4 | lxml==5.4.0
5 | slowapi==0.1.9
6 | selectolax==0.3.29
7 | 


--------------------------------------------------------------------------------
/vercel.json:
--------------------------------------------------------------------------------
1 | {
2 |  "builds": [{ "src": "main.py", "use": "@vercel/python" }],
3 |  "routes": [{ "src": "/(.*)", "dest": "main.py" }]
4 | }


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | 
3 | services:
4 |   bot:
5 |     container_name: "vlrggapi"
6 |     ports:
7 |       - "3001:3001"
8 |     build: .
9 | 


--------------------------------------------------------------------------------
/api/scrapers/__init__.py:
--------------------------------------------------------------------------------
1 | from .news import vlr_news
2 | from .rankings import vlr_rankings
3 | from .stats import vlr_stats
4 | from .matches import vlr_upcoming_matches, vlr_live_score, vlr_match_results
5 | from .events import vlr_events
6 | from .health import check_health
7 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: pip
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |     open-pull-requests-limit: 3
 8 |     allow:
 9 |       - dependency-type: 'production'
10 |     target-branch: "dev"
11 |   - package-ecosystem: github-actions
12 |     directory: '/'
13 |     schedule:
14 |       interval: weekly
15 |     open-pull-requests-limit: 2
16 |     target-branch: "dev"


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | headers = {
 2 |     "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 3 | }
 4 | 
 5 | 
 6 | region = {
 7 |     "na": "north-america",
 8 |     "eu": "europe",
 9 |     "ap": "asia-pacific",
10 |     "la": "latin-america",
11 |     "la-s": "la-s",
12 |     "la-n": "la-n",
13 |     "oce": "oceania",
14 |     "kr": "korea",
15 |     "mn": "mena",
16 |     "gc": "gc",
17 |     "br": "Brazil",
18 |     "cn": "china",
19 |     "jp": "japan",
20 |     "col": "collegiate",
21 | }
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tiangolo/uvicorn-gunicorn:python3.9-alpine3.14 as base
 2 | 
 3 | RUN mkdir -p /vlrggapi
 4 | 
 5 | WORKDIR /vlrggapi
 6 | 
 7 | COPY requirements.txt .
 8 | RUN pip install --no-cache-dir  -r requirements.txt
 9 | 
10 | 
11 | FROM tiangolo/uvicorn-gunicorn:python3.9-alpine3.14 as final
12 | 
13 | WORKDIR /vlrggapi
14 | COPY --from=base /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
15 | COPY . .
16 | 
17 | RUN apk add curl
18 | 
19 | CMD ["python", "main.py"]
20 | HEALTHCHECK --interval=5s --timeout=3s CMD curl --fail http://127.0.0.1:3001/health || exit 1
21 | 


--------------------------------------------------------------------------------
/api/scrapers/health.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def check_health():
 5 |     sites = ["https://vlrggapi.vercel.app", "https://vlr.gg"]
 6 |     results = {}
 7 |     for site in sites:
 8 |         try:
 9 |             response = requests.get(site, timeout=5)
10 |             results[site] = {
11 |                 "status": "Healthy" if response.status_code == 200 else "Unhealthy",
12 |                 "status_code": response.status_code,
13 |             }
14 |         except requests.RequestException:
15 |             results[site] = {"status": "Unhealthy", "status_code": None}
16 |     return results
17 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import uvicorn
 4 | from fastapi import FastAPI
 5 | from fastapi.responses import RedirectResponse
 6 | from slowapi import Limiter, _rate_limit_exceeded_handler
 7 | from slowapi.errors import RateLimitExceeded
 8 | from slowapi.util import get_remote_address
 9 | 
10 | from routers.vlr_router import router as vlr_router
11 | 
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger(__name__)
14 | 
15 | app = FastAPI(
16 |     title="vlrggapi",
17 |     description="An Unofficial REST API for [vlr.gg](https://www.vlr.gg/), a site for Valorant Esports match and news coverage. Made by [axsddlr](https://github.com/axsddlr)",
18 |     docs_url="/",
19 |     redoc_url=None,
20 | )
21 | 
22 | 
23 | limiter = Limiter(key_func=get_remote_address)
24 | app.state.limiter = limiter
25 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
26 | app.include_router(vlr_router)
27 | 
28 | 
29 | @app.get("/", include_in_schema=False)
30 | def root():
31 |     return RedirectResponse(url="/docs")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     uvicorn.run("main:app", host="0.0.0.0", port=3001)
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2024 Andre Saddler
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/api/scrapers/news.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from selectolax.parser import HTMLParser
 3 | 
 4 | from utils.utils import headers
 5 | 
 6 | 
 7 | def vlr_news():
 8 |     url = "https://www.vlr.gg/news"
 9 |     resp = requests.get(url, headers=headers)
10 |     html = HTMLParser(resp.text)
11 |     status = resp.status_code
12 | 
13 |     result = []
14 |     for item in html.css("a.wf-module-item"):
15 |         date_author = item.css_first("div.ge-text-light").text()
16 |         date, author = date_author.split("by")
17 | 
18 |         desc = item.css_first("div").css_first("div:nth-child(2)").text().strip()
19 | 
20 |         title = item.css_first("div:nth-child(1)").text().strip().split("\n")[0]
21 |         title = title.replace("\t", "")
22 | 
23 |         url = item.css_first("a.wf-module-item").attributes["href"]
24 | 
25 |         result.append(
26 |             {
27 |                 "title": title,
28 |                 "description": desc,
29 |                 "date": date.split("\u2022")[1].strip(),
30 |                 "author": author.strip(),
31 |                 "url_path": "https://vlr.gg" + url,
32 |             }
33 |         )
34 | 
35 |     data = {"data": {"status": status, "segments": result}}
36 | 
37 |     if status != 200:
38 |         raise Exception("API response: {}".format(status))
39 |     return data
40 | 


--------------------------------------------------------------------------------
/api/scrape.py:
--------------------------------------------------------------------------------
 1 | from api.scrapers import (
 2 |     check_health,
 3 |     vlr_events,
 4 |     vlr_live_score,
 5 |     vlr_match_results,
 6 |     vlr_news,
 7 |     vlr_rankings,
 8 |     vlr_stats,
 9 |     vlr_upcoming_matches,
10 | )
11 | 
12 | 
13 | class Vlr:
14 |     @staticmethod
15 |     def vlr_news():
16 |         return vlr_news()
17 | 
18 |     @staticmethod
19 |     def vlr_rankings(region):
20 |         return vlr_rankings(region)
21 | 
22 |     @staticmethod
23 |     def vlr_stats(region: str, timespan: str):
24 |         return vlr_stats(region, timespan)
25 | 
26 |     @staticmethod
27 |     def vlr_upcoming_matches(num_pages=1, from_page=None, to_page=None):
28 |         return vlr_upcoming_matches(num_pages, from_page, to_page)
29 | 
30 |     @staticmethod
31 |     def vlr_live_score(num_pages=1, from_page=None, to_page=None):
32 |         return vlr_live_score(num_pages, from_page, to_page)
33 | 
34 |     @staticmethod
35 |     def vlr_match_results(num_pages=1, from_page=None, to_page=None, max_retries=3, request_delay=1.0, timeout=30):
36 |         return vlr_match_results(num_pages, from_page, to_page, max_retries, request_delay, timeout)
37 | 
38 |     @staticmethod
39 |     def vlr_events(upcoming=True, completed=True, page=1):
40 |         return vlr_events(upcoming, completed, page)
41 | 
42 |     @staticmethod
43 |     def check_health():
44 |         return check_health()
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     print(Vlr.vlr_live_score())
49 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: 🐞 Bug report
 2 | description: Create a report to help us improve
 3 | labels: [bug]
 4 | body:
 5 |   - type: input
 6 |     id: describe-the-bug
 7 |     attributes:
 8 |       label: Describe the bug
 9 |       description: |
10 |         A clear and concise description of what the bug is.
11 |       placeholder: |
12 |         Example: "This endpoint is not working..."
13 |     validations:
14 |       required: true
15 | 
16 |   - type: textarea
17 |     id: reproduce-steps
18 |     attributes:
19 |       label: Steps to reproduce
20 |       description: Provide an example of the issue.
21 |       placeholder: |
22 |         Example:
23 |           1. First step
24 |           2. Second step
25 |           3. Issue here
26 |     validations:
27 |       required: true
28 | 
29 |   - type: textarea
30 |     id: expected-behavior
31 |     attributes:
32 |       label: Expected behavior
33 |       placeholder: |
34 |         Example: 
35 |           "This should happen..."
36 |     validations:
37 |       required: true
38 | 
39 |   - type: textarea
40 |     id: actual-behavior
41 |     attributes:
42 |       label: Actual behavior
43 |       placeholder: |
44 |         Example:
45 |           "This happened instead..."
46 |     validations:
47 |       required: true
48 |     
49 |   - type: textarea
50 |     id: additional-context
51 |     attributes:
52 |       label: Additional context
53 |       description: |
54 |         Add any other context about the problem here.
55 |       placeholder: |
56 |         Example:
57 |           "Also ..."


--------------------------------------------------------------------------------
/.github/workflows/docker_dev.yml:
--------------------------------------------------------------------------------
 1 | name: vlrggapi_dev
 2 | 
 3 | on:
 4 |   # run it on push to the default repository branch
 5 |   push:
 6 |     branches: [dev]
 7 |   # run it during pull request
 8 |   pull_request:
 9 | 
10 | env:
11 |   REGISTRY: ghcr.io
12 |   IMAGE_NAME: ${{ github.repository }}
13 | 
14 | jobs:
15 |   # define job to build and publish docker image
16 |   build-and-push-docker-image:
17 |     name: Build Docker image and push to repositories
18 |     # run only when code is compiling and tests are passing
19 |     runs-on: ubuntu-latest
20 | 
21 |     # steps to perform in job
22 |     steps:
23 |       - name: Checkout code
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Set up QEMU
27 |         uses: docker/setup-qemu-action@v3
28 | 
29 |       # setup Docker buld action
30 |       - name: Set up Docker Buildx
31 |         id: buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Login to Github Packages
35 |         uses: docker/login-action@v3
36 |         with:
37 |           registry: ghcr.io
38 |           username: ${{ github.actor }}
39 |           password: ${{ secrets.GHCR_PAT }}
40 | 
41 |       - name: Build image and push to Docker Hub and GitHub Container Registry
42 |         uses: docker/build-push-action@v6
43 |         with:
44 |           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
45 |           # relative path to the place where source code with Dockerfile is located
46 |           context: ./
47 |           # Note: tags has to be all lower-case
48 |           tags: ghcr.io/axsddlr/vlrggapi:dev
49 |           # build on feature branches, push only on main branch
50 |           push: ${{ github.ref == 'refs/heads/dev' }}
51 | 
52 |       - name: Image digest
53 |         run: echo ${{ steps.docker_build.outputs.digest }}


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: vlrggapi_latest
 2 | 
 3 | on:
 4 |   # run it on push to the default repository branch
 5 |   push:
 6 |     branches: [master]
 7 |   # run it during pull request
 8 |   pull_request:
 9 | 
10 | env:
11 |   REGISTRY: ghcr.io
12 |   IMAGE_NAME: ${{ github.repository }}
13 | 
14 | jobs:
15 |   # define job to build and publish docker image
16 |   build-and-push-docker-image:
17 |     name: Build Docker image and push to repositories
18 |     # run only when code is compiling and tests are passing
19 |     runs-on: ubuntu-latest
20 | 
21 |     # steps to perform in job
22 |     steps:
23 |       - name: Checkout code
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Set up QEMU
27 |         uses: docker/setup-qemu-action@v3
28 | 
29 |       # setup Docker build action
30 |       - name: Set up Docker Buildx
31 |         id: buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Login to Github Packages
35 |         uses: docker/login-action@v3
36 |         with:
37 |           registry: ${{ env.REGISTRY }}
38 |           username: ${{ github.actor }}
39 |           password: ${{ secrets.GHCR_PAT }}
40 | 
41 |       - name: Build image and push to Docker Hub and GitHub Container Registry
42 |         uses: docker/build-push-action@v6
43 |         with:
44 |           platforms: linux/amd64,linux/arm64,linux/arm/v7
45 |           # relative path to the place where source code with Dockerfile is located
46 |           context: ./
47 |           # Note: tags has to be all lower-case
48 |           tags: ${{ env.REGISTRY }}/${{ github.repository }}:latest
49 |           # build on feature branches, push only on main branch
50 |           push: ${{ github.ref == 'refs/heads/master' }}
51 | 
52 |       # 24-July-2023 Update: Use the new environment files for state and output
53 |       - name: Save state
54 |         run: echo "name=value" >> $GITHUB_STATE
55 | 
56 |       - name: Set output
57 |         run: echo "name=value" >> $GITHUB_OUTPUT
58 | 
59 |       - name: Image digest
60 |         run: echo ${{ steps.docker_build.outputs.digest }}
61 | 


--------------------------------------------------------------------------------
/api/scrapers/stats.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from selectolax.parser import HTMLParser
 3 | 
 4 | from utils.utils import headers
 5 | 
 6 | 
 7 | def vlr_stats(region: str, timespan: str):
 8 |     base_url = f"https://www.vlr.gg/stats/?event_group_id=all&event_id=all&region={region}&country=all&min_rounds=200&min_rating=1550&agent=all&map_id=all"
 9 |     url = (
10 |         f"{base_url}&timespan=all"
11 |         if timespan.lower() == "all"
12 |         else f"{base_url}&timespan={timespan}d"
13 |     )
14 | 
15 |     resp = requests.get(url, headers=headers)
16 |     html = HTMLParser(resp.text)
17 |     status = resp.status_code
18 | 
19 |     result = []
20 |     for item in html.css("tbody tr"):
21 |         player = item.text().replace("\t", "").replace("\n", " ").strip().split()
22 |         player_name = player[0]
23 |         org = player[1] if len(player) > 1 else "N/A"
24 | 
25 |         agents = [
26 |             agents.attributes["src"].split("/")[-1].split(".")[0]
27 |             for agents in item.css("td.mod-agents img")
28 |         ]
29 |         color_sq = [stats.text() for stats in item.css("td.mod-color-sq")]
30 |         rnd = item.css_first("td.mod-rnd").text()
31 | 
32 |         result.append(
33 |             {
34 |                 "player": player_name,
35 |                 "org": org,
36 |                 "agents": agents,
37 |                 "rounds_played": rnd,
38 |                 "rating": color_sq[0],
39 |                 "average_combat_score": color_sq[1],
40 |                 "kill_deaths": color_sq[2],
41 |                 "kill_assists_survived_traded": color_sq[3],
42 |                 "average_damage_per_round": color_sq[4],
43 |                 "kills_per_round": color_sq[5],
44 |                 "assists_per_round": color_sq[6],
45 |                 "first_kills_per_round": color_sq[7],
46 |                 "first_deaths_per_round": color_sq[8],
47 |                 "headshot_percentage": color_sq[9],
48 |                 "clutch_success_percentage": color_sq[10],
49 |             }
50 |         )
51 | 
52 |     segments = {"status": status, "segments": result}
53 |     data = {"data": segments}
54 | 
55 |     if status != 200:
56 |         raise Exception("API response: {}".format(status))
57 |     return data
58 | 


--------------------------------------------------------------------------------
/api/scrapers/rankings.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import requests
 4 | from selectolax.parser import HTMLParser
 5 | 
 6 | from utils.utils import headers, region
 7 | 
 8 | 
 9 | def vlr_rankings(region_key):
10 |     url = "https://www.vlr.gg/rankings/" + region[str(region_key)]
11 |     resp = requests.get(url, headers=headers)
12 |     html = HTMLParser(resp.text)
13 |     status = resp.status_code
14 | 
15 |     result = []
16 |     for item in html.css("div.rank-item"):
17 |         rank = item.css_first("div.rank-item-rank-num").text().strip()
18 |         team = item.css_first("div.ge-text").text().split("#")[0]
19 |         logo = item.css_first("a.rank-item-team").css_first("img").attributes["src"]
20 |         logo = re.sub(r"\/img\/vlr\/tmp\/vlr.png", "", logo)
21 |         country = item.css_first("div.rank-item-team-country").text()
22 |         last_played = (
23 |             item.css_first("a.rank-item-last")
24 |             .text()
25 |             .replace("\n", "")
26 |             .replace("\t", "")
27 |             .split("v")[0]
28 |         )
29 |         last_played_team = (
30 |             item.css_first("a.rank-item-last")
31 |             .text()
32 |             .replace("\t", "")
33 |             .replace("\n", "")
34 |             .split("o")[1]
35 |             .replace(".", ". ")
36 |         )
37 |         last_played_team_logo = (
38 |             item.css_first("a.rank-item-last").css_first("img").attributes["src"]
39 |         )
40 |         record = (
41 |             item.css_first("div.rank-item-record")
42 |             .text()
43 |             .replace("\t", "")
44 |             .replace("\n", "")
45 |         )
46 |         earnings = (
47 |             item.css_first("div.rank-item-earnings")
48 |             .text()
49 |             .replace("\t", "")
50 |             .replace("\n", "")
51 |         )
52 | 
53 |         result.append(
54 |             {
55 |                 "rank": rank,
56 |                 "team": team.strip(),
57 |                 "country": country,
58 |                 "last_played": last_played.strip(),
59 |                 "last_played_team": last_played_team.strip(),
60 |                 "last_played_team_logo": last_played_team_logo,
61 |                 "record": record,
62 |                 "earnings": earnings,
63 |                 "logo": logo,
64 |             }
65 |         )
66 | 
67 |     data = {"status": status, "data": result}
68 | 
69 |     if status != 200:
70 |         raise Exception("API response: {}".format(status))
71 |     return data
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Config
  2 | config.py
  3 | config.json
  4 | info.json
  5 | headers.json
  6 | graph.png
  7 | config.ini
  8 | lock.lock
  9 | app.db
 10 | *.code-workspace
 11 | 
 12 | # pycharm
 13 | .idea
 14 | 
 15 | # Byte-compiled / optimized / DLL files
 16 | __pycache__/
 17 | *.py[cod]
 18 | *$py.class
 19 | 
 20 | # C extensions
 21 | *.so
 22 | 
 23 | # Distribution / packaging
 24 | .Python
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | wheels/
 37 | pip-wheel-metadata/
 38 | share/python-wheels/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | MANIFEST
 43 | 
 44 | # PyInstaller
 45 | #  Usually these files are written by a python script from a template
 46 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .nox/
 58 | .coverage
 59 | .coverage.*
 60 | .cache
 61 | nosetests.xml
 62 | coverage.xml
 63 | *.cover
 64 | *.py,cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | db.sqlite3-journal
 77 | 
 78 | # Flask stuff:
 79 | instance/
 80 | .webassets-cache
 81 | 
 82 | # Scrapy stuff:
 83 | .scrapy
 84 | 
 85 | # Sphinx documentation
 86 | docs/_build/
 87 | 
 88 | # PyBuilder
 89 | target/
 90 | 
 91 | # Jupyter Notebook
 92 | .ipynb_checkpoints
 93 | 
 94 | # IPython
 95 | profile_default/
 96 | ipython_config.py
 97 | 
 98 | # pyenv
 99 | .python-version
100 | 
101 | # pipenv
102 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | #   install all needed dependencies.
106 | #Pipfile.lock
107 | 
108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
109 | __pypackages__/
110 | 
111 | # Celery stuff
112 | celerybeat-schedule
113 | celerybeat.pid
114 | 
115 | # SageMath parsed files
116 | *.sage.py
117 | 
118 | # Environments
119 | .env
120 | .venv
121 | env/
122 | venv/
123 | ENV/
124 | env.bak/
125 | venv.bak/
126 | 
127 | # Spyder project settings
128 | .spyderproject
129 | .spyproject
130 | 
131 | # Rope project settings
132 | .ropeproject
133 | 
134 | # mkdocs documentation
135 | /site
136 | 
137 | # mypy
138 | .mypy_cache/
139 | .dmypy.json
140 | dmypy.json
141 | 
142 | # Pyre type checker
143 | .pyre/
144 | 


--------------------------------------------------------------------------------
/routers/vlr_router.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, Query, Request
  2 | from slowapi import Limiter
  3 | from slowapi.util import get_remote_address
  4 | 
  5 | from api.scrape import Vlr
  6 | 
  7 | router = APIRouter()
  8 | limiter = Limiter(key_func=get_remote_address)
  9 | vlr = Vlr()
 10 | 
 11 | 
 12 | @router.get("/news")
 13 | @limiter.limit("600/minute")
 14 | async def VLR_news(request: Request):
 15 |     return vlr.vlr_news()
 16 | 
 17 | 
 18 | @router.get("/stats")
 19 | @limiter.limit("600/minute")
 20 | async def VLR_stats(
 21 |     request: Request,
 22 |     region: str = Query(..., description="Region shortname"),
 23 |     timespan: str = Query(..., description="Timespan (30, 60, 90, or all)"),
 24 | ):
 25 |     """
 26 |     Get VLR stats with query parameters.
 27 | 
 28 |     region shortnames:\n
 29 |         "na": "north-america",\n
 30 |         "eu": "europe",\n
 31 |         "ap": "asia-pacific",\n
 32 |         "sa": "latin-america",\n
 33 |         "jp": "japan",\n
 34 |         "oce": "oceania",\n
 35 |         "mn": "mena"\n
 36 |     """
 37 |     return vlr.vlr_stats(region, timespan)
 38 | 
 39 | 
 40 | @router.get("/rankings")
 41 | @limiter.limit("600/minute")
 42 | async def VLR_ranks(
 43 |     request: Request, region: str = Query(..., description="Region shortname")
 44 | ):
 45 |     """
 46 |     Get VLR rankings for a specific region.
 47 | 
 48 |     region shortnames:\n
 49 |         "na": "north-america",\n
 50 |         "eu": "europe",\n
 51 |         "ap": "asia-pacific",\n
 52 |         "la": "latin-america",\n
 53 |         "la-s": "la-s",\n
 54 |         "la-n": "la-n",\n
 55 |         "oce": "oceania",\n
 56 |         "kr": "korea",\n
 57 |         "mn": "mena",\n
 58 |         "gc": "game-changers",\n
 59 |         "br": "Brazil",\n
 60 |         "cn": "china",\n
 61 |         "jp": "japan",\n
 62 |         "col": "collegiate",\n
 63 |     """
 64 |     return vlr.vlr_rankings(region)
 65 | 
 66 | 
 67 | @router.get("/match")
 68 | @limiter.limit("600/minute")
 69 | async def VLR_match(
 70 |     request: Request, 
 71 |     q: str,
 72 |     num_pages: int = Query(1, description="Number of pages to scrape (default: 1)", ge=1, le=600),
 73 |     from_page: int = Query(None, description="Starting page number (1-based, optional)", ge=1, le=600),
 74 |     to_page: int = Query(None, description="Ending page number (1-based, inclusive, optional)", ge=1, le=600),
 75 |     max_retries: int = Query(3, description="Maximum retry attempts per page (default: 3)", ge=1, le=5),
 76 |     request_delay: float = Query(1.0, description="Delay between requests in seconds (default: 1.0)", ge=0.5, le=5.0),
 77 |     timeout: int = Query(30, description="Request timeout in seconds (default: 30)", ge=10, le=120)
 78 | ):
 79 |     """
 80 |     query parameters:\n
 81 |         "upcoming": upcoming matches,\n
 82 |         "live_score": live match scores,\n
 83 |         "results": match results,\n
 84 |     
 85 |     Page Range Options:
 86 |     - num_pages: Number of pages from page 1 (ignored if from_page/to_page specified)
 87 |     - from_page: Starting page number (1-based, optional)
 88 |     - to_page: Ending page number (1-based, inclusive, optional)
 89 |     
 90 |     Additional parameters for robust scraping:
 91 |     - max_retries: Maximum retry attempts per failed page (1-5, default: 3)
 92 |     - request_delay: Delay between requests in seconds (0.5-5.0, default: 1.0)
 93 |     - timeout: Request timeout in seconds (10-120, default: 30)
 94 |     
 95 |     Examples:
 96 |     - /match?q=results&num_pages=5 (scrapes pages 1-5)
 97 |     - /match?q=results&from_page=10&to_page=15 (scrapes pages 10-15)
 98 |     - /match?q=results&from_page=5&num_pages=3 (scrapes pages 5-7)
 99 |     """
100 |     if q == "upcoming":
101 |         return vlr.vlr_upcoming_matches(num_pages, from_page, to_page)
102 |     elif q == "live_score":
103 |         return vlr.vlr_live_score(num_pages, from_page, to_page)
104 |     elif q == "results":
105 |         return vlr.vlr_match_results(num_pages, from_page, to_page, max_retries, request_delay, timeout)
106 | 
107 |     else:
108 |         return {"error": "Invalid query parameter"}
109 | 
110 | 
111 | @router.get("/events")
112 | @limiter.limit("600/minute")
113 | async def VLR_events(
114 |     request: Request,
115 |     q: str = Query(
116 |         None, 
117 |         description="Event type filter",
118 |         example="completed",
119 |         enum=["upcoming", "completed"]
120 |     ),
121 |     page: int = Query(
122 |         1, 
123 |         description="Page number for pagination (only applies to completed events)",
124 |         example=1,
125 |         ge=1, 
126 |         le=100
127 |     )
128 | ):
129 |     """
130 |     Get Valorant events from VLR.GG with optional filtering and pagination.
131 |     
132 |     ## Event Types:
133 |     - **upcoming**: Currently active or scheduled future events
134 |     - **completed**: Historical events that have finished
135 |     - **default**: Both upcoming and completed events (when q parameter is omitted)
136 |     
137 |     ## Pagination:
138 |     - Only applies to **completed events**
139 |     - Upcoming events are always from the first page
140 |     - Page numbers range from 1 to 100
141 |     - Each page contains approximately 25-30 events
142 |     
143 |     ## Usage Examples:
144 |     - `GET /events` - All events (upcoming + completed page 1)
145 |     - `GET /events?q=upcoming` - Only upcoming events
146 |     - `GET /events?q=completed` - Only completed events (page 1)
147 |     - `GET /events?q=completed&page=3` - Completed events from page 3
148 |     - `GET /events?page=2` - All events (upcoming + completed page 2)
149 |     
150 |     ## Response Format:
151 |     Returns event details including title, status, prize pool, dates, region, thumbnail, and event URL.
152 |     """
153 |     if q == "upcoming":
154 |         return vlr.vlr_events(upcoming=True, completed=False, page=page)
155 |     elif q == "completed":
156 |         return vlr.vlr_events(upcoming=False, completed=True, page=page)
157 |     else:
158 |         return vlr.vlr_events(upcoming=True, completed=True, page=page)
159 | 
160 | 
161 | @router.get("/health")
162 | def health():
163 |     return vlr.check_health()
164 | 


--------------------------------------------------------------------------------
/api/scrapers/events.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import requests
  3 | from selectolax.parser import HTMLParser
  4 | 
  5 | from utils.utils import headers
  6 | 
  7 | 
  8 | def vlr_events(upcoming=True, completed=True, page=1):
  9 |     """
 10 |     Get Valorant events from VLR.GG
 11 | 
 12 |     Args:
 13 |         upcoming (bool): If True, include upcoming events
 14 |         completed (bool): If True, include completed events
 15 |         page (int): Page number for pagination (only applies to completed events)
 16 | 
 17 |     Returns:
 18 |         dict: Response with status code and events data
 19 |     """
 20 |     # Build URL with pagination for completed events
 21 |     if completed and page > 1:
 22 |         url = f"https://www.vlr.gg/events/?page={page}"
 23 |     else:
 24 |         url = "https://www.vlr.gg/events"
 25 |     
 26 |     resp = requests.get(url, headers=headers)
 27 |     html = HTMLParser(resp.text)
 28 |     status = resp.status_code
 29 | 
 30 |     # If both are False, show both (default behavior)
 31 |     if not upcoming and not completed:
 32 |         upcoming = True
 33 |         completed = True
 34 | 
 35 |     events = []
 36 | 
 37 |     def parse_events(container):
 38 |         """Helper function to parse event cards"""
 39 |         for event_item in container.css("a.event-item"):
 40 |             title = event_item.css_first(".event-item-title")
 41 |             title = title.text(strip=True) if title else ""
 42 | 
 43 |             status_elem = event_item.css_first(".event-item-desc-item-status")
 44 |             event_status = status_elem.text(strip=True) if status_elem else ""
 45 | 
 46 |             # Prize - extract monetary value or TBD (before the nested label div)
 47 |             prize_elem = event_item.css_first(".event-item-desc-item.mod-prize")
 48 |             prize = ""
 49 |             if prize_elem:
 50 |                 # Get the HTML and extract text before the first nested div
 51 |                 full_text = prize_elem.text(strip=True)
 52 |                 
 53 |                 # Split by common separators and take the first meaningful part
 54 |                 # The structure is: "$250,000<div>Prize Pool</div>" or "TBD<div>Prize Pool</div>"
 55 |                 parts = re.split(r'(?=Prize Pool|prize pool)', full_text, flags=re.IGNORECASE)
 56 |                 if parts:
 57 |                     first_part = parts[0].strip()
 58 |                     
 59 |                     # Clean up any remaining whitespace or newlines
 60 |                     first_part = re.sub(r'\s+', ' ', first_part).strip()
 61 |                     
 62 |                     # Check for TBD
 63 |                     if first_part.upper() == "TBD":
 64 |                         prize = "TBD"
 65 |                     # Check for dollar amounts
 66 |                     elif re.match(r'^\$[\d,]+$', first_part):
 67 |                         prize = first_part
 68 |                     # Check for numeric values (add $ if missing)
 69 |                     elif re.match(r'^[\d,]+$', first_part) and len(first_part) > 2:
 70 |                         prize = "$" + first_part
 71 | 
 72 |             # Dates - extract date range like "Jul 15—Aug 31", avoid TBD if it's for prize
 73 |             dates_elem = event_item.css_first(".event-item-desc-item.mod-dates")
 74 |             dates = ""
 75 |             if dates_elem:
 76 |                 full_text = dates_elem.text(strip=True)
 77 |                 # Use regex to find date patterns like "Jul 15—Aug 31" or "Dec 1—15"
 78 |                 date_match = re.search(
 79 |                     r"[A-Za-z]{3}\s+\d+[—\-–]+[A-Za-z]*\s*\d+", full_text
 80 |                 )
 81 |                 if date_match:
 82 |                     dates = date_match.group()
 83 |                 else:
 84 |                     # If TBD was found in prize section from dates, don't use TBD as dates
 85 |                     if prize != "TBD" and re.search(
 86 |                         r"\bTBD\b", full_text, re.IGNORECASE
 87 |                     ):
 88 |                         dates = "TBD"
 89 |                     else:
 90 |                         # Fallback: look for any text before "Dates" or similar keywords
 91 |                         lines = full_text.split("\n")
 92 |                         for line in lines:
 93 |                             line = line.strip()
 94 |                             if line and not any(
 95 |                                 keyword in line.lower()
 96 |                                 for keyword in ["dates", "label", "prize", "pool"]
 97 |                             ):
 98 |                                 # Look for lines that contain month abbreviations or date-like patterns
 99 |                                 if (
100 |                                     any(
101 |                                         month in line
102 |                                         for month in [
103 |                                             "Jan",
104 |                                             "Feb",
105 |                                             "Mar",
106 |                                             "Apr",
107 |                                             "May",
108 |                                             "Jun",
109 |                                             "Jul",
110 |                                             "Aug",
111 |                                             "Sep",
112 |                                             "Oct",
113 |                                             "Nov",
114 |                                             "Dec",
115 |                                         ]
116 |                                     )
117 |                                     or "—" in line
118 |                                 ):
119 |                                     dates = line
120 |                                     break
121 | 
122 |             # Region from flag
123 |             region = ""
124 |             flag_elem = event_item.css_first(".event-item-desc-item.mod-location .flag")
125 |             if flag_elem:
126 |                 class_attr = flag_elem.attributes.get("class", "")
127 |                 region = class_attr.replace("flag mod-", "").strip()
128 | 
129 |             # Thumbnail
130 |             thumb = ""
131 |             img_elem = event_item.css_first(".event-item-thumb img")
132 |             if img_elem:
133 |                 src = img_elem.attributes.get("src", "")
134 |                 if src.startswith("//"):
135 |                     thumb = "https:" + src
136 |                 elif src.startswith("/"):
137 |                     thumb = "https://www.vlr.gg" + src
138 |                 else:
139 |                     thumb = src
140 | 
141 |             # URL path
142 |             url_path = event_item.attributes.get("href", "")
143 |             full_url = "https://www.vlr.gg" + url_path if url_path else ""
144 | 
145 |             events.append(
146 |                 {
147 |                     "title": title,
148 |                     "status": event_status,
149 |                     "prize": prize,
150 |                     "dates": dates,
151 |                     "region": region,
152 |                     "thumb": thumb,
153 |                     "url_path": full_url,
154 |                 }
155 |             )
156 | 
157 |     # Parse upcoming events
158 |     if upcoming:
159 |         upcoming_sections = html.css("div.wf-label.mod-large.mod-upcoming")
160 |         for section in upcoming_sections:
161 |             parent = section.parent
162 |             if parent and parent.css("a.event-item"):
163 |                 parse_events(parent)
164 | 
165 |     # Parse completed events
166 |     if completed:
167 |         completed_sections = html.css("div.wf-label.mod-large.mod-completed")
168 |         for section in completed_sections:
169 |             parent = section.parent
170 |             if parent and parent.css("a.event-item"):
171 |                 parse_events(parent)
172 | 
173 |     return {"data": {"status": status, "segments": events}}
174 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # vlrggapi
  2 | 
  3 | An Unofficial REST API for [vlr.gg](https://www.vlr.gg/), a site for Valorant Esports match and news coverage.
  4 | 
  5 | Built by [Andre Saddler](https://github.com/axsddlr/)
  6 | 
  7 | ## Current Endpoints
  8 | 
  9 | All endpoints are relative to [https://vlrggapi.vercel.app](https://vlrggapi.vercel.app).
 10 | 
 11 | ### `/news`
 12 | 
 13 | - Method: `GET`
 14 | - Description: Fetches the latest news articles related to Valorant Esports.
 15 | - Example: `GET https://vlrggapi.vercel.app/news`
 16 | - Response Example:
 17 | 
 18 | ```json
 19 | {
 20 |   "data": {
 21 |     "status": 200,
 22 |     "segments": [
 23 |       {
 24 |         "title": "Riot introduces changes to Premier, adds new Invite Division",
 25 |         "description": "Riot looks to streamline Premier promotions and Challengers qualification with upcoming changes.",
 26 |         "date": "April 23, 2024",
 27 |         "author": "thothgow",
 28 |         "url_path": "https://vlr.gg/336099/riot-introduces-changes-to-premier-adds-new-invite-division"
 29 |       },
 30 |       {
 31 |         "title": "jakee announces competitive retirement",
 32 |         "description": "From Collegiate to the Tier 1 stage, the Controller main had seen it all.",
 33 |         "date": "April 21, 2024",
 34 |         "author": "ChickenJoe",
 35 |         "url_path": "https://vlr.gg/334341/jakee-announces-competitive-retirement"
 36 |       }
 37 |     ]
 38 |   }
 39 | }
 40 | ```
 41 | 
 42 | ### `/stats`
 43 | 
 44 | - Method: `GET`
 45 | - Description: Fetches player statistics for a specific region and timespan.
 46 | - Query Parameters:
 47 |   - `region`: Region shortname (e.g., "na" for North America).
 48 |   - `timespan`: Time span in days (e.g., "30" for the last 30 days, or "all" for all time).
 49 | - Example: `GET https://vlrggapi.vercel.app/stats?region=na&timespan=30`
 50 | 
 51 | - Response Example:
 52 | 
 53 | ```json
 54 | {
 55 |   "data": {
 56 |     "status": 200,
 57 |     "segments": [
 58 |       {
 59 |         "player": "corey",
 60 |         "org": "TTR",
 61 |         "rating": "1.18",
 62 |         "average_combat_score": "235.2",
 63 |         "kill_deaths": "1.19",
 64 |         "kill_assists_survived_traded": "72%",
 65 |         "average_damage_per_round": "158.4",
 66 |         "kills_per_round": "0.81",
 67 |         "assists_per_round": "0.29",
 68 |         "first_kills_per_round": "0.19",
 69 |         "first_deaths_per_round": "0.13",
 70 |         "headshot_percentage": "26%",
 71 |         "clutch_success_percentage": "28%"
 72 |       },
 73 |       {
 74 |         "player": "wedid",
 75 |         "org": "TTR",
 76 |         "rating": "1.15",
 77 |         "average_combat_score": "216.1",
 78 |         "kill_deaths": "1.11",
 79 |         "kill_assists_survived_traded": "72%",
 80 |         "average_damage_per_round": "141.0",
 81 |         "kills_per_round": "0.76",
 82 |         "assists_per_round": "0.39",
 83 |         "first_kills_per_round": "0.07",
 84 |         "first_deaths_per_round": "0.10",
 85 |         "headshot_percentage": "32%",
 86 |         "clutch_success_percentage": "19%"
 87 |       }
 88 |     ]
 89 |   }
 90 | }
 91 | ```
 92 | 
 93 | ### `/rankings`
 94 | 
 95 | - Method: `GET`
 96 | - Description: Fetches rankings for a specific region.
 97 | - Query Parameters:
 98 |   - `region`: Region shortname (e.g., "na" for North America).
 99 | - Example: `GET https://vlrggapi.vercel.app/rankings?region=na`
100 | - Response Example:
101 | 
102 | ```json
103 | {
104 |   "status": 200,
105 |   "data": [
106 |     {
107 |       "rank": "1",
108 |       "team": "M80",
109 |       "country": "Canada",
110 |       "last_played": "4d ago",
111 |       "last_played_team": "vs. Turtle Tr",
112 |       "last_played_team_logo": "//owcdn.net/img/63d552c5dd028.png",
113 |       "record": "4-1",
114 |       "earnings": "$104,850",
115 |       "logo": "//owcdn.net/img/63d91e60a84bc.png"
116 |     },
117 |     {
118 |       "rank": "2",
119 |       "team": "Sentinels",
120 |       "country": "United States",
121 |       "last_played": "22h ago",
122 |       "last_played_team": "vs. Evil Geniuses",
123 |       "last_played_team_logo": "//owcdn.net/img/62a409ad29351.png",
124 |       "record": "7-3",
125 |       "earnings": "$295,500",
126 |       "logo": "//owcdn.net/img/62875027c8e06.png"
127 |     }
128 |   ]
129 | }
130 | ```
131 | 
132 | ### `/match`
133 | 
134 | - Method: `GET`
135 | - Description: Fetches matches based on the query parameter provided.
136 | - Query Parameters:
137 |   - `q`: Type of matches to fetch ("upcoming", "live_score", "results").
138 | - Examples:
139 |   - Upcoming matches: `GET https://vlrggapi.vercel.app/match?q=upcoming`
140 |   - Live scores: `GET https://vlrggapi.vercel.app/match?q=live_score`
141 |   - Match results: `GET https://vlrggapi.vercel.app/match?q=results`
142 | - Response Example for `q=upcoming`:
143 | 
144 | ```json
145 | {
146 |   "data": {
147 |     "status": 200,
148 |     "segments": [
149 |       {
150 |         "team1": "G2 Esports",
151 |         "team2": "Leviatán",
152 |         "flag1": "flag_us",
153 |         "flag2": "flag_cl",
154 |         "time_until_match": "51m from now",
155 |         "match_series": "Regular Season: Week 3",
156 |         "match_event": "Champions Tour 2024: Americas Stage 1",
157 |         "unix_timestamp": "2024-04-24 21:00:00",
158 |         "match_page": "https://www.vlr.gg/314642/g2-esports-vs-leviat-n-champions-tour-2024-americas-stage-1-w3"
159 |       }
160 |     ]
161 |   }
162 | }
163 | ```
164 | 
165 | - Response Example for `q=live_score`:
166 | 
167 | ```json
168 | {
169 |   "data": {
170 |     "status": 200,
171 |     "segments": [
172 |       {
173 |         "team1": "Team 1 Name",
174 |         "team2": "Team 2 Name",
175 |         "flag1": "Country Flag of Team 1",
176 |         "flag2": "Country Flag of Team 2",
177 |         "team1_logo": "URL to Team 1 logo",
178 |         "team2_logo": "URL to Team 2 logo",
179 |         "score1": "Team 1 Score",
180 |         "score2": "Team 2 Score",
181 |         "team1_round_ct": "Team 1 CT-side rounds",
182 |         "team1_round_t": "Team 1 T-side rounds",
183 |         "team2_round_ct": "Team 2 CT-side rounds",
184 |         "team2_round_t": "Team 2 T-side rounds",
185 |         "map_number": "Current map number in the series",
186 |         "current_map": "Current map being played",
187 |         "time_until_match": "LIVE",
188 |         "match_event": "Event name",
189 |         "match_series": "Match series",
190 |         "unix_timestamp": "Match start time in UNIX timestamp",
191 |         "match_page": "URL to the match page"
192 |       }
193 |     ]
194 |   }
195 | }
196 | ```
197 | 
198 | - Response Example for `q=results`:
199 | 
200 | ```json
201 | {
202 |   "data": {
203 |     "status": 200,
204 |     "segments": [
205 |       {
206 |         "team1": "Team Vitality",
207 |         "team2": "Gentle Mates",
208 |         "score1": "0",
209 |         "score2": "2",
210 |         "flag1": "flag_eu",
211 |         "flag2": "flag_fr",
212 |         "time_completed": "2h 44m ago",
213 |         "round_info": "Regular Season-Week 4",
214 |         "tournament_name": "Champions Tour 2024: EMEA Stage 1",
215 |         "match_page": "/318931/team-vitality-vs-gentle-mates-champions-tour-2024-emea-stage-1-w4",
216 |         "tournament_icon": "https://owcdn.net/img/65ab59620a233.png"
217 |       }
218 |     ]
219 |   }
220 | }
221 | ```
222 | 
223 | ### `/events`
224 | 
225 | - Method: `GET`
226 | - Description: Fetches Valorant events from vlr.gg with filtering and pagination options.
227 | - Query Parameters:
228 |   - `q`: Event type filter (optional)
229 |     - `"upcoming"`: Show only upcoming events
230 |     - `"completed"`: Show only completed events
231 |     - No parameter or other values: Show both upcoming and completed events
232 |   - `page`: Page number for pagination (optional, default: 1, applies to completed events only)
233 | - Examples:
234 |   - All events: `GET https://vlrggapi.vercel.app/events`
235 |   - Upcoming only: `GET https://vlrggapi.vercel.app/events?q=upcoming`
236 |   - Completed only: `GET https://vlrggapi.vercel.app/events?q=completed`
237 |   - Completed events page 2: `GET https://vlrggapi.vercel.app/events?q=completed&page=2`
238 | - Response Example:
239 | 
240 | ```json
241 | {
242 |   "data": {
243 |     "status": 200,
244 |     "segments": [
245 |       {
246 |         "title": "VCT 2025: Pacific Stage 2",
247 |         "status": "ongoing",
248 |         "prize": "$250,000",
249 |         "dates": "Jul 15—Aug 31",
250 |         "region": "kr",
251 |         "thumb": "https://owcdn.net/img/640f5ae002674.png",
252 |         "url_path": "https://www.vlr.gg/event/2500/vct-2025-pacific-stage-2"
253 |       },
254 |       {
255 |         "title": "VCT 2025: China Stage 2",
256 |         "status": "ongoing",
257 |         "prize": "TBD",
258 |         "dates": "Jul 3—Aug 24",
259 |         "region": "cn",
260 |         "thumb": "https://owcdn.net/img/65dd97cea9a25.png",
261 |         "url_path": "https://www.vlr.gg/event/2499/vct-2025-china-stage-2"
262 |       }
263 |     ]
264 |   }
265 | }
266 | ```
267 | 
268 | ### `/health`
269 | 
270 | - Method: `GET`
271 | - Description: Returns the health status of the API and vlr.gg website.
272 | - Example: `GET https://vlrggapi.vercel.app/health`
273 | - Response Example:
274 | 
275 | ```json
276 | {
277 |   "https://vlrggapi.vercel.app": {
278 |     "status": "Healthy",
279 |     "status_code": 200
280 |   },
281 |   "https://vlr.gg": {
282 |     "status": "Healthy",
283 |     "status_code": 200
284 |   }
285 | }
286 | ```
287 | 
288 | The response includes the status ("Healthy" or "Unhealthy") and the HTTP status code for both the API and the vlr.gg website. If a site is unreachable, the status will be "Unhealthy" and the status_code will be null.
289 | 
290 | ## Installation
291 | 
292 | ### Source
293 | 
294 | ```python
295 | 
296 | git clone https://github.com/axsddlr/vlrggapi/
297 | cd vlrggapi
298 | pip3 install -r requirements.txt
299 | 
300 | ```
301 | 
302 | ### Usage
303 | 
304 | ```markdown
305 | 
306 | python3 main.py
307 | 
308 | ```
309 | 
310 | ## Built With
311 | 
312 | - [FastAPI](https://fastapi.tiangolo.com/)
313 | - [Requests](https://requests.readthedocs.io/en/master/)
314 | - [Selectolax](https://github.com/rushter/selectolax)
315 | - [uvicorn](https://www.uvicorn.org/)
316 | 
317 | ## Contributing
318 | 
319 | Feel free to submit a [pull request](https://github.com/axsddlr/vlrggapi/pull/new/master) or an [issue](https://github.com/axsddlr/vlrggapi/issues/new)!
320 | 
321 | ## License
322 | 
323 | The MIT License (MIT)
324 | 


--------------------------------------------------------------------------------
/api/scrapers/matches.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import time
  3 | from datetime import datetime, timezone
  4 | 
  5 | import requests
  6 | from selectolax.parser import HTMLParser
  7 | 
  8 | from utils.utils import headers
  9 | 
 10 | 
 11 | def vlr_upcoming_matches(num_pages=1, from_page=None, to_page=None):
 12 |     """
 13 |     Get upcoming matches from VLR.GG.
 14 |     
 15 |     Args:
 16 |         num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified)
 17 |         from_page (int, optional): Starting page number (1-based)
 18 |         to_page (int, optional): Ending page number (1-based, inclusive)
 19 |     """
 20 |     # Note: VLR.GG upcoming matches are typically only on the homepage
 21 |     # Page range parameters are included for API consistency but may not apply
 22 |     url = "https://www.vlr.gg"
 23 |     resp = requests.get(url, headers=headers)
 24 |     html = HTMLParser(resp.text)
 25 |     status = resp.status_code
 26 | 
 27 |     result = []
 28 |     for item in html.css(".js-home-matches-upcoming a.wf-module-item"):
 29 |         is_upcoming = item.css_first(".h-match-eta.mod-upcoming")
 30 |         if is_upcoming:
 31 |             teams = []
 32 |             flags = []
 33 |             scores = []
 34 |             for team in item.css(".h-match-team"):
 35 |                 teams.append(team.css_first(".h-match-team-name").text().strip())
 36 |                 flags.append(
 37 |                     team.css_first(".flag")
 38 |                     .attributes["class"]
 39 |                     .replace(" mod-", "")
 40 |                     .replace("16", "_")
 41 |                 )
 42 |                 scores.append(team.css_first(".h-match-team-score").text().strip())
 43 | 
 44 |             eta = item.css_first(".h-match-eta").text().strip()
 45 |             if eta != "LIVE":
 46 |                 eta = eta + " from now"
 47 | 
 48 |             match_event = item.css_first(".h-match-preview-event").text().strip()
 49 |             match_series = item.css_first(".h-match-preview-series").text().strip()
 50 |             timestamp = datetime.fromtimestamp(
 51 |                 int(item.css_first(".moment-tz-convert").attributes["data-utc-ts"]),
 52 |                 tz=timezone.utc,
 53 |             ).strftime("%Y-%m-%d %H:%M:%S")
 54 |             url_path = "https://www.vlr.gg/" + item.attributes["href"]
 55 | 
 56 |             result.append(
 57 |                 {
 58 |                     "team1": teams[0],
 59 |                     "team2": teams[1],
 60 |                     "flag1": flags[0],
 61 |                     "flag2": flags[1],
 62 |                     "time_until_match": eta,
 63 |                     "match_series": match_series,
 64 |                     "match_event": match_event,
 65 |                     "unix_timestamp": timestamp,
 66 |                     "match_page": url_path,
 67 |                 }
 68 |             )
 69 | 
 70 |     segments = {"status": status, "segments": result}
 71 |     data = {"data": segments}
 72 | 
 73 |     if status != 200:
 74 |         raise Exception("API response: {}".format(status))
 75 |     return data
 76 | 
 77 | 
 78 | def vlr_live_score(num_pages=1, from_page=None, to_page=None):
 79 |     """
 80 |     Get live match scores from VLR.GG.
 81 |     
 82 |     Args:
 83 |         num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified)
 84 |         from_page (int, optional): Starting page number (1-based)
 85 |         to_page (int, optional): Ending page number (1-based, inclusive)
 86 |     """
 87 |     # Note: VLR.GG live matches are typically only on the homepage
 88 |     # Page range parameters are included for API consistency but may not apply
 89 |     url = "https://www.vlr.gg"
 90 |     resp = requests.get(url, headers=headers)
 91 |     html = HTMLParser(resp.text)
 92 |     status = resp.status_code
 93 | 
 94 |     matches = html.css(".js-home-matches-upcoming a.wf-module-item")
 95 |     result = []
 96 |     for match in matches:
 97 |         is_live = match.css_first(".h-match-eta.mod-live")
 98 |         if is_live:
 99 |             teams = []
100 |             flags = []
101 |             scores = []
102 |             round_texts = []
103 |             for team in match.css(".h-match-team"):
104 |                 teams.append(team.css_first(".h-match-team-name").text().strip())
105 |                 flags.append(
106 |                     team.css_first(".flag")
107 |                     .attributes["class"]
108 |                     .replace(" mod-", "")
109 |                     .replace("16", "_")
110 |                 )
111 |                 scores.append(team.css_first(".h-match-team-score").text().strip())
112 |                 round_info_ct = team.css(".h-match-team-rounds .mod-ct")
113 |                 round_info_t = team.css(".h-match-team-rounds .mod-t")
114 |                 round_text_ct = (
115 |                     round_info_ct[0].text().strip() if round_info_ct else "N/A"
116 |                 )
117 |                 round_text_t = round_info_t[0].text().strip() if round_info_t else "N/A"
118 |                 round_texts.append({"ct": round_text_ct, "t": round_text_t})
119 | 
120 |             eta = "LIVE"
121 |             match_event = match.css_first(".h-match-preview-event").text().strip()
122 |             match_series = match.css_first(".h-match-preview-series").text().strip()
123 |             timestamp = datetime.fromtimestamp(
124 |                 int(match.css_first(".moment-tz-convert").attributes["data-utc-ts"]),
125 |                 tz=timezone.utc,
126 |             ).strftime("%Y-%m-%d %H:%M:%S")
127 |             url_path = "https://www.vlr.gg/" + match.attributes["href"]
128 | 
129 |             match_page = requests.get(url_path, headers=headers)
130 |             match_html = HTMLParser(match_page.text)
131 |             
132 |             team_logos = []
133 |             for img in match_html.css(".match-header-vs img"):
134 |                 logo_url = "https:" + img.attributes.get("src", "")
135 |                 team_logos.append(logo_url)
136 | 
137 |             current_map_element = match_html.css_first(
138 |                 ".vm-stats-gamesnav-item.js-map-switch.mod-active.mod-live"
139 |             )
140 |             current_map = "Unknown"
141 |             if current_map_element:
142 |                 current_map = (
143 |                     current_map_element.css_first("div", default="Unknown")
144 |                     .text()
145 |                     .strip()
146 |                     .replace("\n", "")
147 |                     .replace("\t", "")
148 |                 )
149 |                 current_map = re.sub(r"^\d+", "", current_map)
150 |                 map_number_match = (
151 |                     current_map_element.css_first("div", default="Unknown")
152 |                     .text()
153 |                     .strip()
154 |                     .replace("\n", "")
155 |                     .replace("\t", "")
156 |                 )
157 |                 map_number_match = re.search(r"^\d+", map_number_match)
158 |                 map_number = (
159 |                     map_number_match.group(0) if map_number_match else "Unknown"
160 |                 )
161 | 
162 |             team1_round_ct = round_texts[0]["ct"] if len(round_texts) > 0 else "N/A"
163 |             team1_round_t = round_texts[0]["t"] if len(round_texts) > 0 else "N/A"
164 |             team2_round_ct = round_texts[1]["ct"] if len(round_texts) > 1 else "N/A"
165 |             team2_round_t = round_texts[1]["t"] if len(round_texts) > 1 else "N/A"
166 |             result.append(
167 |                 {
168 |                     "team1": teams[0],
169 |                     "team2": teams[1],
170 |                     "flag1": flags[0],
171 |                     "flag2": flags[1],
172 |                     "team1_logo": team_logos[0] if len(team_logos) > 0 else "",
173 |                     "team2_logo": team_logos[1] if len(team_logos) > 1 else "",
174 |                     "score1": scores[0],
175 |                     "score2": scores[1],
176 |                     "team1_round_ct": team1_round_ct,
177 |                     "team1_round_t": team1_round_t,
178 |                     "team2_round_ct": team2_round_ct,
179 |                     "team2_round_t": team2_round_t,
180 |                     "map_number": map_number,
181 |                     "current_map": current_map,
182 |                     "time_until_match": eta,
183 |                     "match_event": match_event,
184 |                     "match_series": match_series,
185 |                     "unix_timestamp": timestamp,
186 |                     "match_page": url_path,
187 |                 }
188 |             )
189 | 
190 |     segments = {"status": status, "segments": result}
191 |     data = {"data": segments}
192 | 
193 |     if status != 200:
194 |         raise Exception("API response: {}".format(status))
195 |     return data
196 | 
197 | 
198 | def vlr_match_results(num_pages=1, from_page=None, to_page=None, max_retries=3, request_delay=1.0, timeout=30):
199 |     """
200 |     Scrape match results with robust error handling for large page counts.
201 |     
202 |     Args:
203 |         num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified)
204 |         from_page (int, optional): Starting page number (1-based)
205 |         to_page (int, optional): Ending page number (1-based, inclusive)
206 |         max_retries (int): Maximum retry attempts per page
207 |         request_delay (float): Delay between requests in seconds
208 |         timeout (int): Request timeout in seconds
209 |         
210 |     Returns:
211 |         dict: API response with match data
212 |     """
213 | 
214 |     result = []
215 |     status = 200
216 |     failed_pages = []
217 |     
218 |     # Determine page range
219 |     if from_page is not None and to_page is not None:
220 |         if from_page < 1:
221 |             raise ValueError("from_page must be >= 1")
222 |         if to_page < from_page:
223 |             raise ValueError("to_page must be >= from_page")
224 |         start_page = from_page
225 |         end_page = to_page
226 |         total_pages = end_page - start_page + 1
227 |     elif from_page is not None:
228 |         if from_page < 1:
229 |             raise ValueError("from_page must be >= 1")
230 |         start_page = from_page
231 |         end_page = from_page + num_pages - 1
232 |         total_pages = num_pages
233 |     elif to_page is not None:
234 |         if to_page < 1:
235 |             raise ValueError("to_page must be >= 1")
236 |         start_page = max(1, to_page - num_pages + 1)
237 |         end_page = to_page
238 |         total_pages = end_page - start_page + 1
239 |     else:
240 |         # Default behavior: scrape from page 1
241 |         start_page = 1
242 |         end_page = num_pages
243 |         total_pages = num_pages
244 |     
245 |     # Create a session for connection pooling and efficiency
246 |     session = requests.Session()
247 |     session.headers.update(headers)
248 |     
249 |     print(f"Starting to scrape pages {start_page}-{end_page} ({total_pages} pages) with {request_delay}s delay between requests...")
250 |     
251 |     for page in range(start_page, end_page + 1):
252 |         page_success = False
253 |         retry_count = 0
254 |         
255 |         while not page_success and retry_count < max_retries:
256 |             try:
257 |                 if page == 1:
258 |                     url = "https://www.vlr.gg/matches/results"
259 |                 else:
260 |                     url = f"https://www.vlr.gg/matches/results/?page={page}"
261 |                 
262 |                 current_page_num = page - start_page + 1
263 |                 print(f"Scraping page {page} ({current_page_num}/{total_pages}) (attempt {retry_count + 1}/{max_retries})")
264 |                 
265 |                 # Add timeout and handle potential connection issues
266 |                 resp = session.get(url, timeout=timeout)
267 |                 html = HTMLParser(resp.text)
268 |                 current_status = resp.status_code
269 |                 
270 |                 if current_status != 200:
271 |                     print(f"Warning: Page {page} returned status {current_status}")
272 |                     retry_count += 1
273 |                     if retry_count < max_retries:
274 |                         time.sleep(request_delay * (2 ** retry_count))  # Exponential backoff
275 |                     continue
276 |                 
277 |                 page_results = []
278 |                 items = html.css("a.wf-module-item")
279 |                 
280 |                 if not items:
281 |                     print(f"Warning: No match items found on page {page}")
282 |                     page_success = True  # Consider empty page as success
283 |                     break
284 |                 
285 |                 for item in items:
286 |                     try:
287 |                         url_path = item.attributes["href"]
288 |                         eta = item.css_first("div.ml-eta").text() + " ago"
289 |                         rounds = (
290 |                             item.css_first("div.match-item-event-series")
291 |                             .text()
292 |                             .replace("\u2013", "-")
293 |                             .replace("\n", "")
294 |                             .replace("\t", "")
295 |                         )
296 |                         tourney = (
297 |                             item.css_first("div.match-item-event")
298 |                             .text()
299 |                             .replace("\t", " ")
300 |                             .strip()
301 |                             .split("\n")[1]
302 |                             .strip()
303 |                         )
304 |                         tourney_icon_url = f"https:{item.css_first('img').attributes['src']}"
305 | 
306 |                         try:
307 |                             team_array = (
308 |                                 item.css_first("div.match-item-vs").css_first("div:nth-child(2)").text()
309 |                             )
310 |                         except Exception:
311 |                             team_array = "TBD"
312 |                         team_array = (
313 |                             team_array.replace("\t", " ")
314 |                             .replace("\n", " ")
315 |                             .strip()
316 |                             .split("                                  ")
317 |                         )
318 |                         team1 = team_array[0]
319 |                         score1 = team_array[1].replace(" ", "").strip()
320 |                         team2 = team_array[4].strip()
321 |                         score2 = team_array[-1].replace(" ", "").strip()
322 | 
323 |                         flag_list = [
324 |                             flag_parent.attributes["class"].replace(" mod-", "_")
325 |                             for flag_parent in item.css(".flag")
326 |                         ]
327 |                         flag1 = flag_list[0] if len(flag_list) > 0 else ""
328 |                         flag2 = flag_list[1] if len(flag_list) > 1 else ""
329 | 
330 |                         page_results.append(
331 |                             {
332 |                                 "team1": team1,
333 |                                 "team2": team2,
334 |                                 "score1": score1,
335 |                                 "score2": score2,
336 |                                 "flag1": flag1,
337 |                                 "flag2": flag2,
338 |                                 "time_completed": eta,
339 |                                 "round_info": rounds,
340 |                                 "tournament_name": tourney,
341 |                                 "match_page": url_path,
342 |                                 "tournament_icon": tourney_icon_url,
343 |                                 "page_number": page,  # Track which page this came from
344 |                             }
345 |                         )
346 |                     except Exception as e:
347 |                         print(f"Warning: Failed to parse match item on page {page}: {str(e)}")
348 |                         continue
349 |                 
350 |                 result.extend(page_results)
351 |                 print(f"Successfully scraped page {page}: {len(page_results)} matches")
352 |                 page_success = True
353 |                 
354 |                 # Rate limiting between successful requests
355 |                 if page < end_page:
356 |                     time.sleep(request_delay)
357 |                 
358 |             except requests.exceptions.Timeout:
359 |                 retry_count += 1
360 |                 print(f"Timeout error on page {page}, attempt {retry_count}/{max_retries}")
361 |                 if retry_count < max_retries:
362 |                     backoff_time = request_delay * (2 ** retry_count)
363 |                     print(f"Retrying page {page} in {backoff_time:.1f} seconds...")
364 |                     time.sleep(backoff_time)
365 |                 
366 |             except requests.exceptions.ConnectionError:
367 |                 retry_count += 1
368 |                 print(f"Connection error on page {page}, attempt {retry_count}/{max_retries}")
369 |                 if retry_count < max_retries:
370 |                     backoff_time = request_delay * (2 ** retry_count)
371 |                     print(f"Retrying page {page} in {backoff_time:.1f} seconds...")
372 |                     time.sleep(backoff_time)
373 |                 
374 |             except Exception as e:
375 |                 retry_count += 1
376 |                 print(f"Unexpected error on page {page}: {str(e)}")
377 |                 if retry_count < max_retries:
378 |                     backoff_time = request_delay * (2 ** retry_count)
379 |                     print(f"Retrying page {page} in {backoff_time:.1f} seconds...")
380 |                     time.sleep(backoff_time)
381 |         
382 |         if not page_success:
383 |             failed_pages.append(page)
384 |             print(f"Failed to scrape page {page} after {max_retries} attempts")
385 |     
386 |     # Close the session
387 |     session.close()
388 |     
389 |     # Report results
390 |     total_matches = len(result)
391 |     successful_pages = total_pages - len(failed_pages)
392 |     
393 |     print(f"\nScraping completed:")
394 |     print(f"  Page range: {start_page}-{end_page}")
395 |     print(f"  Total matches: {total_matches}")
396 |     print(f"  Successful pages: {successful_pages}/{total_pages}")
397 |     
398 |     if failed_pages:
399 |         print(f"  Failed pages: {failed_pages}")
400 |         print(f"  Consider retrying failed pages or adjusting parameters")
401 |     
402 |     segments = {
403 |         "status": status, 
404 |         "segments": result,
405 |         "meta": {
406 |             "page_range": f"{start_page}-{end_page}",
407 |             "total_pages_requested": total_pages,
408 |             "successful_pages": successful_pages,
409 |             "failed_pages": failed_pages,
410 |             "total_matches": total_matches
411 |         }
412 |     }
413 |     data = {"data": segments}
414 | 
415 |     if not result:
416 |         raise Exception(f"No data retrieved. Failed pages: {failed_pages}")
417 |     
418 |     return data


--------------------------------------------------------------------------------