├── utils ├── __init__.py └── utils.py ├── runtime.txt ├── requirements.txt ├── vercel.json ├── docker-compose.yml ├── api ├── scrapers │ ├── __init__.py │ ├── health.py │ ├── news.py │ ├── stats.py │ ├── rankings.py │ ├── events.py │ └── matches.py └── scrape.py ├── .github ├── dependabot.yml ├── ISSUE_TEMPLATE │ └── bug-report.yml └── workflows │ ├── docker_dev.yml │ └── docker.yml ├── Dockerfile ├── main.py ├── LICENSE ├── .gitignore ├── routers └── vlr_router.py └── README.md /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.9.5 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.4 2 | uvicorn==0.34.3 3 | fastapi==0.115.13 4 | lxml==5.4.0 5 | slowapi==0.1.9 6 | selectolax==0.3.29 7 | -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "builds": [{ "src": "main.py", "use": "@vercel/python" }], 3 | "routes": [{ "src": "/(.*)", "dest": "main.py" }] 4 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | bot: 5 | container_name: "vlrggapi" 6 | ports: 7 | - "3001:3001" 8 | build: . 9 | -------------------------------------------------------------------------------- /api/scrapers/__init__.py: -------------------------------------------------------------------------------- 1 | from .news import vlr_news 2 | from .rankings import vlr_rankings 3 | from .stats import vlr_stats 4 | from .matches import vlr_upcoming_matches, vlr_live_score, vlr_match_results 5 | from .events import vlr_events 6 | from .health import check_health 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 3 8 | allow: 9 | - dependency-type: 'production' 10 | target-branch: "dev" 11 | - package-ecosystem: github-actions 12 | directory: '/' 13 | schedule: 14 | interval: weekly 15 | open-pull-requests-limit: 2 16 | target-branch: "dev" -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | headers = { 2 | "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0", 3 | } 4 | 5 | 6 | region = { 7 | "na": "north-america", 8 | "eu": "europe", 9 | "ap": "asia-pacific", 10 | "la": "latin-america", 11 | "la-s": "la-s", 12 | "la-n": "la-n", 13 | "oce": "oceania", 14 | "kr": "korea", 15 | "mn": "mena", 16 | "gc": "gc", 17 | "br": "Brazil", 18 | "cn": "china", 19 | "jp": "japan", 20 | "col": "collegiate", 21 | } 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tiangolo/uvicorn-gunicorn:python3.9-alpine3.14 as base 2 | 3 | RUN mkdir -p /vlrggapi 4 | 5 | WORKDIR /vlrggapi 6 | 7 | COPY requirements.txt . 8 | RUN pip install --no-cache-dir -r requirements.txt 9 | 10 | 11 | FROM tiangolo/uvicorn-gunicorn:python3.9-alpine3.14 as final 12 | 13 | WORKDIR /vlrggapi 14 | COPY --from=base /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages 15 | COPY . . 16 | 17 | RUN apk add curl 18 | 19 | CMD ["python", "main.py"] 20 | HEALTHCHECK --interval=5s --timeout=3s CMD curl --fail http://127.0.0.1:3001/health || exit 1 21 | -------------------------------------------------------------------------------- /api/scrapers/health.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def check_health(): 5 | sites = ["https://vlrggapi.vercel.app", "https://vlr.gg"] 6 | results = {} 7 | for site in sites: 8 | try: 9 | response = requests.get(site, timeout=5) 10 | results[site] = { 11 | "status": "Healthy" if response.status_code == 200 else "Unhealthy", 12 | "status_code": response.status_code, 13 | } 14 | except requests.RequestException: 15 | results[site] = {"status": "Unhealthy", "status_code": None} 16 | return results 17 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import uvicorn 4 | from fastapi import FastAPI 5 | from fastapi.responses import RedirectResponse 6 | from slowapi import Limiter, _rate_limit_exceeded_handler 7 | from slowapi.errors import RateLimitExceeded 8 | from slowapi.util import get_remote_address 9 | 10 | from routers.vlr_router import router as vlr_router 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | app = FastAPI( 16 | title="vlrggapi", 17 | description="An Unofficial REST API for [vlr.gg](https://www.vlr.gg/), a site for Valorant Esports match and news coverage. Made by [axsddlr](https://github.com/axsddlr)", 18 | docs_url="/", 19 | redoc_url=None, 20 | ) 21 | 22 | 23 | limiter = Limiter(key_func=get_remote_address) 24 | app.state.limiter = limiter 25 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) 26 | app.include_router(vlr_router) 27 | 28 | 29 | @app.get("/", include_in_schema=False) 30 | def root(): 31 | return RedirectResponse(url="/docs") 32 | 33 | 34 | if __name__ == "__main__": 35 | uvicorn.run("main:app", host="0.0.0.0", port=3001) 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2024 Andre Saddler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /api/scrapers/news.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from selectolax.parser import HTMLParser 3 | 4 | from utils.utils import headers 5 | 6 | 7 | def vlr_news(): 8 | url = "https://www.vlr.gg/news" 9 | resp = requests.get(url, headers=headers) 10 | html = HTMLParser(resp.text) 11 | status = resp.status_code 12 | 13 | result = [] 14 | for item in html.css("a.wf-module-item"): 15 | date_author = item.css_first("div.ge-text-light").text() 16 | date, author = date_author.split("by") 17 | 18 | desc = item.css_first("div").css_first("div:nth-child(2)").text().strip() 19 | 20 | title = item.css_first("div:nth-child(1)").text().strip().split("\n")[0] 21 | title = title.replace("\t", "") 22 | 23 | url = item.css_first("a.wf-module-item").attributes["href"] 24 | 25 | result.append( 26 | { 27 | "title": title, 28 | "description": desc, 29 | "date": date.split("\u2022")[1].strip(), 30 | "author": author.strip(), 31 | "url_path": "https://vlr.gg" + url, 32 | } 33 | ) 34 | 35 | data = {"data": {"status": status, "segments": result}} 36 | 37 | if status != 200: 38 | raise Exception("API response: {}".format(status)) 39 | return data 40 | -------------------------------------------------------------------------------- /api/scrape.py: -------------------------------------------------------------------------------- 1 | from api.scrapers import ( 2 | check_health, 3 | vlr_events, 4 | vlr_live_score, 5 | vlr_match_results, 6 | vlr_news, 7 | vlr_rankings, 8 | vlr_stats, 9 | vlr_upcoming_matches, 10 | ) 11 | 12 | 13 | class Vlr: 14 | @staticmethod 15 | def vlr_news(): 16 | return vlr_news() 17 | 18 | @staticmethod 19 | def vlr_rankings(region): 20 | return vlr_rankings(region) 21 | 22 | @staticmethod 23 | def vlr_stats(region: str, timespan: str): 24 | return vlr_stats(region, timespan) 25 | 26 | @staticmethod 27 | def vlr_upcoming_matches(num_pages=1, from_page=None, to_page=None): 28 | return vlr_upcoming_matches(num_pages, from_page, to_page) 29 | 30 | @staticmethod 31 | def vlr_live_score(num_pages=1, from_page=None, to_page=None): 32 | return vlr_live_score(num_pages, from_page, to_page) 33 | 34 | @staticmethod 35 | def vlr_match_results(num_pages=1, from_page=None, to_page=None, max_retries=3, request_delay=1.0, timeout=30): 36 | return vlr_match_results(num_pages, from_page, to_page, max_retries, request_delay, timeout) 37 | 38 | @staticmethod 39 | def vlr_events(upcoming=True, completed=True, page=1): 40 | return vlr_events(upcoming, completed, page) 41 | 42 | @staticmethod 43 | def check_health(): 44 | return check_health() 45 | 46 | 47 | if __name__ == "__main__": 48 | print(Vlr.vlr_live_score()) 49 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: 🐞 Bug report 2 | description: Create a report to help us improve 3 | labels: [bug] 4 | body: 5 | - type: input 6 | id: describe-the-bug 7 | attributes: 8 | label: Describe the bug 9 | description: | 10 | A clear and concise description of what the bug is. 11 | placeholder: | 12 | Example: "This endpoint is not working..." 13 | validations: 14 | required: true 15 | 16 | - type: textarea 17 | id: reproduce-steps 18 | attributes: 19 | label: Steps to reproduce 20 | description: Provide an example of the issue. 21 | placeholder: | 22 | Example: 23 | 1. First step 24 | 2. Second step 25 | 3. Issue here 26 | validations: 27 | required: true 28 | 29 | - type: textarea 30 | id: expected-behavior 31 | attributes: 32 | label: Expected behavior 33 | placeholder: | 34 | Example: 35 | "This should happen..." 36 | validations: 37 | required: true 38 | 39 | - type: textarea 40 | id: actual-behavior 41 | attributes: 42 | label: Actual behavior 43 | placeholder: | 44 | Example: 45 | "This happened instead..." 46 | validations: 47 | required: true 48 | 49 | - type: textarea 50 | id: additional-context 51 | attributes: 52 | label: Additional context 53 | description: | 54 | Add any other context about the problem here. 55 | placeholder: | 56 | Example: 57 | "Also ..." -------------------------------------------------------------------------------- /.github/workflows/docker_dev.yml: -------------------------------------------------------------------------------- 1 | name: vlrggapi_dev 2 | 3 | on: 4 | # run it on push to the default repository branch 5 | push: 6 | branches: [dev] 7 | # run it during pull request 8 | pull_request: 9 | 10 | env: 11 | REGISTRY: ghcr.io 12 | IMAGE_NAME: ${{ github.repository }} 13 | 14 | jobs: 15 | # define job to build and publish docker image 16 | build-and-push-docker-image: 17 | name: Build Docker image and push to repositories 18 | # run only when code is compiling and tests are passing 19 | runs-on: ubuntu-latest 20 | 21 | # steps to perform in job 22 | steps: 23 | - name: Checkout code 24 | uses: actions/checkout@v4 25 | 26 | - name: Set up QEMU 27 | uses: docker/setup-qemu-action@v3 28 | 29 | # setup Docker buld action 30 | - name: Set up Docker Buildx 31 | id: buildx 32 | uses: docker/setup-buildx-action@v3 33 | 34 | - name: Login to Github Packages 35 | uses: docker/login-action@v3 36 | with: 37 | registry: ghcr.io 38 | username: ${{ github.actor }} 39 | password: ${{ secrets.GHCR_PAT }} 40 | 41 | - name: Build image and push to Docker Hub and GitHub Container Registry 42 | uses: docker/build-push-action@v6 43 | with: 44 | platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 45 | # relative path to the place where source code with Dockerfile is located 46 | context: ./ 47 | # Note: tags has to be all lower-case 48 | tags: ghcr.io/axsddlr/vlrggapi:dev 49 | # build on feature branches, push only on main branch 50 | push: ${{ github.ref == 'refs/heads/dev' }} 51 | 52 | - name: Image digest 53 | run: echo ${{ steps.docker_build.outputs.digest }} -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: vlrggapi_latest 2 | 3 | on: 4 | # run it on push to the default repository branch 5 | push: 6 | branches: [master] 7 | # run it during pull request 8 | pull_request: 9 | 10 | env: 11 | REGISTRY: ghcr.io 12 | IMAGE_NAME: ${{ github.repository }} 13 | 14 | jobs: 15 | # define job to build and publish docker image 16 | build-and-push-docker-image: 17 | name: Build Docker image and push to repositories 18 | # run only when code is compiling and tests are passing 19 | runs-on: ubuntu-latest 20 | 21 | # steps to perform in job 22 | steps: 23 | - name: Checkout code 24 | uses: actions/checkout@v4 25 | 26 | - name: Set up QEMU 27 | uses: docker/setup-qemu-action@v3 28 | 29 | # setup Docker build action 30 | - name: Set up Docker Buildx 31 | id: buildx 32 | uses: docker/setup-buildx-action@v3 33 | 34 | - name: Login to Github Packages 35 | uses: docker/login-action@v3 36 | with: 37 | registry: ${{ env.REGISTRY }} 38 | username: ${{ github.actor }} 39 | password: ${{ secrets.GHCR_PAT }} 40 | 41 | - name: Build image and push to Docker Hub and GitHub Container Registry 42 | uses: docker/build-push-action@v6 43 | with: 44 | platforms: linux/amd64,linux/arm64,linux/arm/v7 45 | # relative path to the place where source code with Dockerfile is located 46 | context: ./ 47 | # Note: tags has to be all lower-case 48 | tags: ${{ env.REGISTRY }}/${{ github.repository }}:latest 49 | # build on feature branches, push only on main branch 50 | push: ${{ github.ref == 'refs/heads/master' }} 51 | 52 | # 24-July-2023 Update: Use the new environment files for state and output 53 | - name: Save state 54 | run: echo "name=value" >> $GITHUB_STATE 55 | 56 | - name: Set output 57 | run: echo "name=value" >> $GITHUB_OUTPUT 58 | 59 | - name: Image digest 60 | run: echo ${{ steps.docker_build.outputs.digest }} 61 | -------------------------------------------------------------------------------- /api/scrapers/stats.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from selectolax.parser import HTMLParser 3 | 4 | from utils.utils import headers 5 | 6 | 7 | def vlr_stats(region: str, timespan: str): 8 | base_url = f"https://www.vlr.gg/stats/?event_group_id=all&event_id=all®ion={region}&country=all&min_rounds=200&min_rating=1550&agent=all&map_id=all" 9 | url = ( 10 | f"{base_url}×pan=all" 11 | if timespan.lower() == "all" 12 | else f"{base_url}×pan={timespan}d" 13 | ) 14 | 15 | resp = requests.get(url, headers=headers) 16 | html = HTMLParser(resp.text) 17 | status = resp.status_code 18 | 19 | result = [] 20 | for item in html.css("tbody tr"): 21 | player = item.text().replace("\t", "").replace("\n", " ").strip().split() 22 | player_name = player[0] 23 | org = player[1] if len(player) > 1 else "N/A" 24 | 25 | agents = [ 26 | agents.attributes["src"].split("/")[-1].split(".")[0] 27 | for agents in item.css("td.mod-agents img") 28 | ] 29 | color_sq = [stats.text() for stats in item.css("td.mod-color-sq")] 30 | rnd = item.css_first("td.mod-rnd").text() 31 | 32 | result.append( 33 | { 34 | "player": player_name, 35 | "org": org, 36 | "agents": agents, 37 | "rounds_played": rnd, 38 | "rating": color_sq[0], 39 | "average_combat_score": color_sq[1], 40 | "kill_deaths": color_sq[2], 41 | "kill_assists_survived_traded": color_sq[3], 42 | "average_damage_per_round": color_sq[4], 43 | "kills_per_round": color_sq[5], 44 | "assists_per_round": color_sq[6], 45 | "first_kills_per_round": color_sq[7], 46 | "first_deaths_per_round": color_sq[8], 47 | "headshot_percentage": color_sq[9], 48 | "clutch_success_percentage": color_sq[10], 49 | } 50 | ) 51 | 52 | segments = {"status": status, "segments": result} 53 | data = {"data": segments} 54 | 55 | if status != 200: 56 | raise Exception("API response: {}".format(status)) 57 | return data 58 | -------------------------------------------------------------------------------- /api/scrapers/rankings.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import requests 4 | from selectolax.parser import HTMLParser 5 | 6 | from utils.utils import headers, region 7 | 8 | 9 | def vlr_rankings(region_key): 10 | url = "https://www.vlr.gg/rankings/" + region[str(region_key)] 11 | resp = requests.get(url, headers=headers) 12 | html = HTMLParser(resp.text) 13 | status = resp.status_code 14 | 15 | result = [] 16 | for item in html.css("div.rank-item"): 17 | rank = item.css_first("div.rank-item-rank-num").text().strip() 18 | team = item.css_first("div.ge-text").text().split("#")[0] 19 | logo = item.css_first("a.rank-item-team").css_first("img").attributes["src"] 20 | logo = re.sub(r"\/img\/vlr\/tmp\/vlr.png", "", logo) 21 | country = item.css_first("div.rank-item-team-country").text() 22 | last_played = ( 23 | item.css_first("a.rank-item-last") 24 | .text() 25 | .replace("\n", "") 26 | .replace("\t", "") 27 | .split("v")[0] 28 | ) 29 | last_played_team = ( 30 | item.css_first("a.rank-item-last") 31 | .text() 32 | .replace("\t", "") 33 | .replace("\n", "") 34 | .split("o")[1] 35 | .replace(".", ". ") 36 | ) 37 | last_played_team_logo = ( 38 | item.css_first("a.rank-item-last").css_first("img").attributes["src"] 39 | ) 40 | record = ( 41 | item.css_first("div.rank-item-record") 42 | .text() 43 | .replace("\t", "") 44 | .replace("\n", "") 45 | ) 46 | earnings = ( 47 | item.css_first("div.rank-item-earnings") 48 | .text() 49 | .replace("\t", "") 50 | .replace("\n", "") 51 | ) 52 | 53 | result.append( 54 | { 55 | "rank": rank, 56 | "team": team.strip(), 57 | "country": country, 58 | "last_played": last_played.strip(), 59 | "last_played_team": last_played_team.strip(), 60 | "last_played_team_logo": last_played_team_logo, 61 | "record": record, 62 | "earnings": earnings, 63 | "logo": logo, 64 | } 65 | ) 66 | 67 | data = {"status": status, "data": result} 68 | 69 | if status != 200: 70 | raise Exception("API response: {}".format(status)) 71 | return data 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Config 2 | config.py 3 | config.json 4 | info.json 5 | headers.json 6 | graph.png 7 | config.ini 8 | lock.lock 9 | app.db 10 | *.code-workspace 11 | 12 | # pycharm 13 | .idea 14 | 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | pip-wheel-metadata/ 38 | share/python-wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | MANIFEST 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .nox/ 58 | .coverage 59 | .coverage.* 60 | .cache 61 | nosetests.xml 62 | coverage.xml 63 | *.cover 64 | *.py,cover 65 | .hypothesis/ 66 | .pytest_cache/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | db.sqlite3-journal 77 | 78 | # Flask stuff: 79 | instance/ 80 | .webassets-cache 81 | 82 | # Scrapy stuff: 83 | .scrapy 84 | 85 | # Sphinx documentation 86 | docs/_build/ 87 | 88 | # PyBuilder 89 | target/ 90 | 91 | # Jupyter Notebook 92 | .ipynb_checkpoints 93 | 94 | # IPython 95 | profile_default/ 96 | ipython_config.py 97 | 98 | # pyenv 99 | .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | #Pipfile.lock 107 | 108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 109 | __pypackages__/ 110 | 111 | # Celery stuff 112 | celerybeat-schedule 113 | celerybeat.pid 114 | 115 | # SageMath parsed files 116 | *.sage.py 117 | 118 | # Environments 119 | .env 120 | .venv 121 | env/ 122 | venv/ 123 | ENV/ 124 | env.bak/ 125 | venv.bak/ 126 | 127 | # Spyder project settings 128 | .spyderproject 129 | .spyproject 130 | 131 | # Rope project settings 132 | .ropeproject 133 | 134 | # mkdocs documentation 135 | /site 136 | 137 | # mypy 138 | .mypy_cache/ 139 | .dmypy.json 140 | dmypy.json 141 | 142 | # Pyre type checker 143 | .pyre/ 144 | -------------------------------------------------------------------------------- /routers/vlr_router.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Query, Request 2 | from slowapi import Limiter 3 | from slowapi.util import get_remote_address 4 | 5 | from api.scrape import Vlr 6 | 7 | router = APIRouter() 8 | limiter = Limiter(key_func=get_remote_address) 9 | vlr = Vlr() 10 | 11 | 12 | @router.get("/news") 13 | @limiter.limit("600/minute") 14 | async def VLR_news(request: Request): 15 | return vlr.vlr_news() 16 | 17 | 18 | @router.get("/stats") 19 | @limiter.limit("600/minute") 20 | async def VLR_stats( 21 | request: Request, 22 | region: str = Query(..., description="Region shortname"), 23 | timespan: str = Query(..., description="Timespan (30, 60, 90, or all)"), 24 | ): 25 | """ 26 | Get VLR stats with query parameters. 27 | 28 | region shortnames:\n 29 | "na": "north-america",\n 30 | "eu": "europe",\n 31 | "ap": "asia-pacific",\n 32 | "sa": "latin-america",\n 33 | "jp": "japan",\n 34 | "oce": "oceania",\n 35 | "mn": "mena"\n 36 | """ 37 | return vlr.vlr_stats(region, timespan) 38 | 39 | 40 | @router.get("/rankings") 41 | @limiter.limit("600/minute") 42 | async def VLR_ranks( 43 | request: Request, region: str = Query(..., description="Region shortname") 44 | ): 45 | """ 46 | Get VLR rankings for a specific region. 47 | 48 | region shortnames:\n 49 | "na": "north-america",\n 50 | "eu": "europe",\n 51 | "ap": "asia-pacific",\n 52 | "la": "latin-america",\n 53 | "la-s": "la-s",\n 54 | "la-n": "la-n",\n 55 | "oce": "oceania",\n 56 | "kr": "korea",\n 57 | "mn": "mena",\n 58 | "gc": "game-changers",\n 59 | "br": "Brazil",\n 60 | "cn": "china",\n 61 | "jp": "japan",\n 62 | "col": "collegiate",\n 63 | """ 64 | return vlr.vlr_rankings(region) 65 | 66 | 67 | @router.get("/match") 68 | @limiter.limit("600/minute") 69 | async def VLR_match( 70 | request: Request, 71 | q: str, 72 | num_pages: int = Query(1, description="Number of pages to scrape (default: 1)", ge=1, le=600), 73 | from_page: int = Query(None, description="Starting page number (1-based, optional)", ge=1, le=600), 74 | to_page: int = Query(None, description="Ending page number (1-based, inclusive, optional)", ge=1, le=600), 75 | max_retries: int = Query(3, description="Maximum retry attempts per page (default: 3)", ge=1, le=5), 76 | request_delay: float = Query(1.0, description="Delay between requests in seconds (default: 1.0)", ge=0.5, le=5.0), 77 | timeout: int = Query(30, description="Request timeout in seconds (default: 30)", ge=10, le=120) 78 | ): 79 | """ 80 | query parameters:\n 81 | "upcoming": upcoming matches,\n 82 | "live_score": live match scores,\n 83 | "results": match results,\n 84 | 85 | Page Range Options: 86 | - num_pages: Number of pages from page 1 (ignored if from_page/to_page specified) 87 | - from_page: Starting page number (1-based, optional) 88 | - to_page: Ending page number (1-based, inclusive, optional) 89 | 90 | Additional parameters for robust scraping: 91 | - max_retries: Maximum retry attempts per failed page (1-5, default: 3) 92 | - request_delay: Delay between requests in seconds (0.5-5.0, default: 1.0) 93 | - timeout: Request timeout in seconds (10-120, default: 30) 94 | 95 | Examples: 96 | - /match?q=results&num_pages=5 (scrapes pages 1-5) 97 | - /match?q=results&from_page=10&to_page=15 (scrapes pages 10-15) 98 | - /match?q=results&from_page=5&num_pages=3 (scrapes pages 5-7) 99 | """ 100 | if q == "upcoming": 101 | return vlr.vlr_upcoming_matches(num_pages, from_page, to_page) 102 | elif q == "live_score": 103 | return vlr.vlr_live_score(num_pages, from_page, to_page) 104 | elif q == "results": 105 | return vlr.vlr_match_results(num_pages, from_page, to_page, max_retries, request_delay, timeout) 106 | 107 | else: 108 | return {"error": "Invalid query parameter"} 109 | 110 | 111 | @router.get("/events") 112 | @limiter.limit("600/minute") 113 | async def VLR_events( 114 | request: Request, 115 | q: str = Query( 116 | None, 117 | description="Event type filter", 118 | example="completed", 119 | enum=["upcoming", "completed"] 120 | ), 121 | page: int = Query( 122 | 1, 123 | description="Page number for pagination (only applies to completed events)", 124 | example=1, 125 | ge=1, 126 | le=100 127 | ) 128 | ): 129 | """ 130 | Get Valorant events from VLR.GG with optional filtering and pagination. 131 | 132 | ## Event Types: 133 | - **upcoming**: Currently active or scheduled future events 134 | - **completed**: Historical events that have finished 135 | - **default**: Both upcoming and completed events (when q parameter is omitted) 136 | 137 | ## Pagination: 138 | - Only applies to **completed events** 139 | - Upcoming events are always from the first page 140 | - Page numbers range from 1 to 100 141 | - Each page contains approximately 25-30 events 142 | 143 | ## Usage Examples: 144 | - `GET /events` - All events (upcoming + completed page 1) 145 | - `GET /events?q=upcoming` - Only upcoming events 146 | - `GET /events?q=completed` - Only completed events (page 1) 147 | - `GET /events?q=completed&page=3` - Completed events from page 3 148 | - `GET /events?page=2` - All events (upcoming + completed page 2) 149 | 150 | ## Response Format: 151 | Returns event details including title, status, prize pool, dates, region, thumbnail, and event URL. 152 | """ 153 | if q == "upcoming": 154 | return vlr.vlr_events(upcoming=True, completed=False, page=page) 155 | elif q == "completed": 156 | return vlr.vlr_events(upcoming=False, completed=True, page=page) 157 | else: 158 | return vlr.vlr_events(upcoming=True, completed=True, page=page) 159 | 160 | 161 | @router.get("/health") 162 | def health(): 163 | return vlr.check_health() 164 | -------------------------------------------------------------------------------- /api/scrapers/events.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | from selectolax.parser import HTMLParser 4 | 5 | from utils.utils import headers 6 | 7 | 8 | def vlr_events(upcoming=True, completed=True, page=1): 9 | """ 10 | Get Valorant events from VLR.GG 11 | 12 | Args: 13 | upcoming (bool): If True, include upcoming events 14 | completed (bool): If True, include completed events 15 | page (int): Page number for pagination (only applies to completed events) 16 | 17 | Returns: 18 | dict: Response with status code and events data 19 | """ 20 | # Build URL with pagination for completed events 21 | if completed and page > 1: 22 | url = f"https://www.vlr.gg/events/?page={page}" 23 | else: 24 | url = "https://www.vlr.gg/events" 25 | 26 | resp = requests.get(url, headers=headers) 27 | html = HTMLParser(resp.text) 28 | status = resp.status_code 29 | 30 | # If both are False, show both (default behavior) 31 | if not upcoming and not completed: 32 | upcoming = True 33 | completed = True 34 | 35 | events = [] 36 | 37 | def parse_events(container): 38 | """Helper function to parse event cards""" 39 | for event_item in container.css("a.event-item"): 40 | title = event_item.css_first(".event-item-title") 41 | title = title.text(strip=True) if title else "" 42 | 43 | status_elem = event_item.css_first(".event-item-desc-item-status") 44 | event_status = status_elem.text(strip=True) if status_elem else "" 45 | 46 | # Prize - extract monetary value or TBD (before the nested label div) 47 | prize_elem = event_item.css_first(".event-item-desc-item.mod-prize") 48 | prize = "" 49 | if prize_elem: 50 | # Get the HTML and extract text before the first nested div 51 | full_text = prize_elem.text(strip=True) 52 | 53 | # Split by common separators and take the first meaningful part 54 | # The structure is: "$250,000
Prize Pool
" or "TBD
Prize Pool
" 55 | parts = re.split(r'(?=Prize Pool|prize pool)', full_text, flags=re.IGNORECASE) 56 | if parts: 57 | first_part = parts[0].strip() 58 | 59 | # Clean up any remaining whitespace or newlines 60 | first_part = re.sub(r'\s+', ' ', first_part).strip() 61 | 62 | # Check for TBD 63 | if first_part.upper() == "TBD": 64 | prize = "TBD" 65 | # Check for dollar amounts 66 | elif re.match(r'^\$[\d,]+$', first_part): 67 | prize = first_part 68 | # Check for numeric values (add $ if missing) 69 | elif re.match(r'^[\d,]+$', first_part) and len(first_part) > 2: 70 | prize = "$" + first_part 71 | 72 | # Dates - extract date range like "Jul 15—Aug 31", avoid TBD if it's for prize 73 | dates_elem = event_item.css_first(".event-item-desc-item.mod-dates") 74 | dates = "" 75 | if dates_elem: 76 | full_text = dates_elem.text(strip=True) 77 | # Use regex to find date patterns like "Jul 15—Aug 31" or "Dec 1—15" 78 | date_match = re.search( 79 | r"[A-Za-z]{3}\s+\d+[—\-–]+[A-Za-z]*\s*\d+", full_text 80 | ) 81 | if date_match: 82 | dates = date_match.group() 83 | else: 84 | # If TBD was found in prize section from dates, don't use TBD as dates 85 | if prize != "TBD" and re.search( 86 | r"\bTBD\b", full_text, re.IGNORECASE 87 | ): 88 | dates = "TBD" 89 | else: 90 | # Fallback: look for any text before "Dates" or similar keywords 91 | lines = full_text.split("\n") 92 | for line in lines: 93 | line = line.strip() 94 | if line and not any( 95 | keyword in line.lower() 96 | for keyword in ["dates", "label", "prize", "pool"] 97 | ): 98 | # Look for lines that contain month abbreviations or date-like patterns 99 | if ( 100 | any( 101 | month in line 102 | for month in [ 103 | "Jan", 104 | "Feb", 105 | "Mar", 106 | "Apr", 107 | "May", 108 | "Jun", 109 | "Jul", 110 | "Aug", 111 | "Sep", 112 | "Oct", 113 | "Nov", 114 | "Dec", 115 | ] 116 | ) 117 | or "—" in line 118 | ): 119 | dates = line 120 | break 121 | 122 | # Region from flag 123 | region = "" 124 | flag_elem = event_item.css_first(".event-item-desc-item.mod-location .flag") 125 | if flag_elem: 126 | class_attr = flag_elem.attributes.get("class", "") 127 | region = class_attr.replace("flag mod-", "").strip() 128 | 129 | # Thumbnail 130 | thumb = "" 131 | img_elem = event_item.css_first(".event-item-thumb img") 132 | if img_elem: 133 | src = img_elem.attributes.get("src", "") 134 | if src.startswith("//"): 135 | thumb = "https:" + src 136 | elif src.startswith("/"): 137 | thumb = "https://www.vlr.gg" + src 138 | else: 139 | thumb = src 140 | 141 | # URL path 142 | url_path = event_item.attributes.get("href", "") 143 | full_url = "https://www.vlr.gg" + url_path if url_path else "" 144 | 145 | events.append( 146 | { 147 | "title": title, 148 | "status": event_status, 149 | "prize": prize, 150 | "dates": dates, 151 | "region": region, 152 | "thumb": thumb, 153 | "url_path": full_url, 154 | } 155 | ) 156 | 157 | # Parse upcoming events 158 | if upcoming: 159 | upcoming_sections = html.css("div.wf-label.mod-large.mod-upcoming") 160 | for section in upcoming_sections: 161 | parent = section.parent 162 | if parent and parent.css("a.event-item"): 163 | parse_events(parent) 164 | 165 | # Parse completed events 166 | if completed: 167 | completed_sections = html.css("div.wf-label.mod-large.mod-completed") 168 | for section in completed_sections: 169 | parent = section.parent 170 | if parent and parent.css("a.event-item"): 171 | parse_events(parent) 172 | 173 | return {"data": {"status": status, "segments": events}} 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vlrggapi 2 | 3 | An Unofficial REST API for [vlr.gg](https://www.vlr.gg/), a site for Valorant Esports match and news coverage. 4 | 5 | Built by [Andre Saddler](https://github.com/axsddlr/) 6 | 7 | ## Current Endpoints 8 | 9 | All endpoints are relative to [https://vlrggapi.vercel.app](https://vlrggapi.vercel.app). 10 | 11 | ### `/news` 12 | 13 | - Method: `GET` 14 | - Description: Fetches the latest news articles related to Valorant Esports. 15 | - Example: `GET https://vlrggapi.vercel.app/news` 16 | - Response Example: 17 | 18 | ```json 19 | { 20 | "data": { 21 | "status": 200, 22 | "segments": [ 23 | { 24 | "title": "Riot introduces changes to Premier, adds new Invite Division", 25 | "description": "Riot looks to streamline Premier promotions and Challengers qualification with upcoming changes.", 26 | "date": "April 23, 2024", 27 | "author": "thothgow", 28 | "url_path": "https://vlr.gg/336099/riot-introduces-changes-to-premier-adds-new-invite-division" 29 | }, 30 | { 31 | "title": "jakee announces competitive retirement", 32 | "description": "From Collegiate to the Tier 1 stage, the Controller main had seen it all.", 33 | "date": "April 21, 2024", 34 | "author": "ChickenJoe", 35 | "url_path": "https://vlr.gg/334341/jakee-announces-competitive-retirement" 36 | } 37 | ] 38 | } 39 | } 40 | ``` 41 | 42 | ### `/stats` 43 | 44 | - Method: `GET` 45 | - Description: Fetches player statistics for a specific region and timespan. 46 | - Query Parameters: 47 | - `region`: Region shortname (e.g., "na" for North America). 48 | - `timespan`: Time span in days (e.g., "30" for the last 30 days, or "all" for all time). 49 | - Example: `GET https://vlrggapi.vercel.app/stats?region=na×pan=30` 50 | 51 | - Response Example: 52 | 53 | ```json 54 | { 55 | "data": { 56 | "status": 200, 57 | "segments": [ 58 | { 59 | "player": "corey", 60 | "org": "TTR", 61 | "rating": "1.18", 62 | "average_combat_score": "235.2", 63 | "kill_deaths": "1.19", 64 | "kill_assists_survived_traded": "72%", 65 | "average_damage_per_round": "158.4", 66 | "kills_per_round": "0.81", 67 | "assists_per_round": "0.29", 68 | "first_kills_per_round": "0.19", 69 | "first_deaths_per_round": "0.13", 70 | "headshot_percentage": "26%", 71 | "clutch_success_percentage": "28%" 72 | }, 73 | { 74 | "player": "wedid", 75 | "org": "TTR", 76 | "rating": "1.15", 77 | "average_combat_score": "216.1", 78 | "kill_deaths": "1.11", 79 | "kill_assists_survived_traded": "72%", 80 | "average_damage_per_round": "141.0", 81 | "kills_per_round": "0.76", 82 | "assists_per_round": "0.39", 83 | "first_kills_per_round": "0.07", 84 | "first_deaths_per_round": "0.10", 85 | "headshot_percentage": "32%", 86 | "clutch_success_percentage": "19%" 87 | } 88 | ] 89 | } 90 | } 91 | ``` 92 | 93 | ### `/rankings` 94 | 95 | - Method: `GET` 96 | - Description: Fetches rankings for a specific region. 97 | - Query Parameters: 98 | - `region`: Region shortname (e.g., "na" for North America). 99 | - Example: `GET https://vlrggapi.vercel.app/rankings?region=na` 100 | - Response Example: 101 | 102 | ```json 103 | { 104 | "status": 200, 105 | "data": [ 106 | { 107 | "rank": "1", 108 | "team": "M80", 109 | "country": "Canada", 110 | "last_played": "4d ago", 111 | "last_played_team": "vs. Turtle Tr", 112 | "last_played_team_logo": "//owcdn.net/img/63d552c5dd028.png", 113 | "record": "4-1", 114 | "earnings": "$104,850", 115 | "logo": "//owcdn.net/img/63d91e60a84bc.png" 116 | }, 117 | { 118 | "rank": "2", 119 | "team": "Sentinels", 120 | "country": "United States", 121 | "last_played": "22h ago", 122 | "last_played_team": "vs. Evil Geniuses", 123 | "last_played_team_logo": "//owcdn.net/img/62a409ad29351.png", 124 | "record": "7-3", 125 | "earnings": "$295,500", 126 | "logo": "//owcdn.net/img/62875027c8e06.png" 127 | } 128 | ] 129 | } 130 | ``` 131 | 132 | ### `/match` 133 | 134 | - Method: `GET` 135 | - Description: Fetches matches based on the query parameter provided. 136 | - Query Parameters: 137 | - `q`: Type of matches to fetch ("upcoming", "live_score", "results"). 138 | - Examples: 139 | - Upcoming matches: `GET https://vlrggapi.vercel.app/match?q=upcoming` 140 | - Live scores: `GET https://vlrggapi.vercel.app/match?q=live_score` 141 | - Match results: `GET https://vlrggapi.vercel.app/match?q=results` 142 | - Response Example for `q=upcoming`: 143 | 144 | ```json 145 | { 146 | "data": { 147 | "status": 200, 148 | "segments": [ 149 | { 150 | "team1": "G2 Esports", 151 | "team2": "Leviatán", 152 | "flag1": "flag_us", 153 | "flag2": "flag_cl", 154 | "time_until_match": "51m from now", 155 | "match_series": "Regular Season: Week 3", 156 | "match_event": "Champions Tour 2024: Americas Stage 1", 157 | "unix_timestamp": "2024-04-24 21:00:00", 158 | "match_page": "https://www.vlr.gg/314642/g2-esports-vs-leviat-n-champions-tour-2024-americas-stage-1-w3" 159 | } 160 | ] 161 | } 162 | } 163 | ``` 164 | 165 | - Response Example for `q=live_score`: 166 | 167 | ```json 168 | { 169 | "data": { 170 | "status": 200, 171 | "segments": [ 172 | { 173 | "team1": "Team 1 Name", 174 | "team2": "Team 2 Name", 175 | "flag1": "Country Flag of Team 1", 176 | "flag2": "Country Flag of Team 2", 177 | "team1_logo": "URL to Team 1 logo", 178 | "team2_logo": "URL to Team 2 logo", 179 | "score1": "Team 1 Score", 180 | "score2": "Team 2 Score", 181 | "team1_round_ct": "Team 1 CT-side rounds", 182 | "team1_round_t": "Team 1 T-side rounds", 183 | "team2_round_ct": "Team 2 CT-side rounds", 184 | "team2_round_t": "Team 2 T-side rounds", 185 | "map_number": "Current map number in the series", 186 | "current_map": "Current map being played", 187 | "time_until_match": "LIVE", 188 | "match_event": "Event name", 189 | "match_series": "Match series", 190 | "unix_timestamp": "Match start time in UNIX timestamp", 191 | "match_page": "URL to the match page" 192 | } 193 | ] 194 | } 195 | } 196 | ``` 197 | 198 | - Response Example for `q=results`: 199 | 200 | ```json 201 | { 202 | "data": { 203 | "status": 200, 204 | "segments": [ 205 | { 206 | "team1": "Team Vitality", 207 | "team2": "Gentle Mates", 208 | "score1": "0", 209 | "score2": "2", 210 | "flag1": "flag_eu", 211 | "flag2": "flag_fr", 212 | "time_completed": "2h 44m ago", 213 | "round_info": "Regular Season-Week 4", 214 | "tournament_name": "Champions Tour 2024: EMEA Stage 1", 215 | "match_page": "/318931/team-vitality-vs-gentle-mates-champions-tour-2024-emea-stage-1-w4", 216 | "tournament_icon": "https://owcdn.net/img/65ab59620a233.png" 217 | } 218 | ] 219 | } 220 | } 221 | ``` 222 | 223 | ### `/events` 224 | 225 | - Method: `GET` 226 | - Description: Fetches Valorant events from vlr.gg with filtering and pagination options. 227 | - Query Parameters: 228 | - `q`: Event type filter (optional) 229 | - `"upcoming"`: Show only upcoming events 230 | - `"completed"`: Show only completed events 231 | - No parameter or other values: Show both upcoming and completed events 232 | - `page`: Page number for pagination (optional, default: 1, applies to completed events only) 233 | - Examples: 234 | - All events: `GET https://vlrggapi.vercel.app/events` 235 | - Upcoming only: `GET https://vlrggapi.vercel.app/events?q=upcoming` 236 | - Completed only: `GET https://vlrggapi.vercel.app/events?q=completed` 237 | - Completed events page 2: `GET https://vlrggapi.vercel.app/events?q=completed&page=2` 238 | - Response Example: 239 | 240 | ```json 241 | { 242 | "data": { 243 | "status": 200, 244 | "segments": [ 245 | { 246 | "title": "VCT 2025: Pacific Stage 2", 247 | "status": "ongoing", 248 | "prize": "$250,000", 249 | "dates": "Jul 15—Aug 31", 250 | "region": "kr", 251 | "thumb": "https://owcdn.net/img/640f5ae002674.png", 252 | "url_path": "https://www.vlr.gg/event/2500/vct-2025-pacific-stage-2" 253 | }, 254 | { 255 | "title": "VCT 2025: China Stage 2", 256 | "status": "ongoing", 257 | "prize": "TBD", 258 | "dates": "Jul 3—Aug 24", 259 | "region": "cn", 260 | "thumb": "https://owcdn.net/img/65dd97cea9a25.png", 261 | "url_path": "https://www.vlr.gg/event/2499/vct-2025-china-stage-2" 262 | } 263 | ] 264 | } 265 | } 266 | ``` 267 | 268 | ### `/health` 269 | 270 | - Method: `GET` 271 | - Description: Returns the health status of the API and vlr.gg website. 272 | - Example: `GET https://vlrggapi.vercel.app/health` 273 | - Response Example: 274 | 275 | ```json 276 | { 277 | "https://vlrggapi.vercel.app": { 278 | "status": "Healthy", 279 | "status_code": 200 280 | }, 281 | "https://vlr.gg": { 282 | "status": "Healthy", 283 | "status_code": 200 284 | } 285 | } 286 | ``` 287 | 288 | The response includes the status ("Healthy" or "Unhealthy") and the HTTP status code for both the API and the vlr.gg website. If a site is unreachable, the status will be "Unhealthy" and the status_code will be null. 289 | 290 | ## Installation 291 | 292 | ### Source 293 | 294 | ```python 295 | 296 | git clone https://github.com/axsddlr/vlrggapi/ 297 | cd vlrggapi 298 | pip3 install -r requirements.txt 299 | 300 | ``` 301 | 302 | ### Usage 303 | 304 | ```markdown 305 | 306 | python3 main.py 307 | 308 | ``` 309 | 310 | ## Built With 311 | 312 | - [FastAPI](https://fastapi.tiangolo.com/) 313 | - [Requests](https://requests.readthedocs.io/en/master/) 314 | - [Selectolax](https://github.com/rushter/selectolax) 315 | - [uvicorn](https://www.uvicorn.org/) 316 | 317 | ## Contributing 318 | 319 | Feel free to submit a [pull request](https://github.com/axsddlr/vlrggapi/pull/new/master) or an [issue](https://github.com/axsddlr/vlrggapi/issues/new)! 320 | 321 | ## License 322 | 323 | The MIT License (MIT) 324 | -------------------------------------------------------------------------------- /api/scrapers/matches.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | from datetime import datetime, timezone 4 | 5 | import requests 6 | from selectolax.parser import HTMLParser 7 | 8 | from utils.utils import headers 9 | 10 | 11 | def vlr_upcoming_matches(num_pages=1, from_page=None, to_page=None): 12 | """ 13 | Get upcoming matches from VLR.GG. 14 | 15 | Args: 16 | num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified) 17 | from_page (int, optional): Starting page number (1-based) 18 | to_page (int, optional): Ending page number (1-based, inclusive) 19 | """ 20 | # Note: VLR.GG upcoming matches are typically only on the homepage 21 | # Page range parameters are included for API consistency but may not apply 22 | url = "https://www.vlr.gg" 23 | resp = requests.get(url, headers=headers) 24 | html = HTMLParser(resp.text) 25 | status = resp.status_code 26 | 27 | result = [] 28 | for item in html.css(".js-home-matches-upcoming a.wf-module-item"): 29 | is_upcoming = item.css_first(".h-match-eta.mod-upcoming") 30 | if is_upcoming: 31 | teams = [] 32 | flags = [] 33 | scores = [] 34 | for team in item.css(".h-match-team"): 35 | teams.append(team.css_first(".h-match-team-name").text().strip()) 36 | flags.append( 37 | team.css_first(".flag") 38 | .attributes["class"] 39 | .replace(" mod-", "") 40 | .replace("16", "_") 41 | ) 42 | scores.append(team.css_first(".h-match-team-score").text().strip()) 43 | 44 | eta = item.css_first(".h-match-eta").text().strip() 45 | if eta != "LIVE": 46 | eta = eta + " from now" 47 | 48 | match_event = item.css_first(".h-match-preview-event").text().strip() 49 | match_series = item.css_first(".h-match-preview-series").text().strip() 50 | timestamp = datetime.fromtimestamp( 51 | int(item.css_first(".moment-tz-convert").attributes["data-utc-ts"]), 52 | tz=timezone.utc, 53 | ).strftime("%Y-%m-%d %H:%M:%S") 54 | url_path = "https://www.vlr.gg/" + item.attributes["href"] 55 | 56 | result.append( 57 | { 58 | "team1": teams[0], 59 | "team2": teams[1], 60 | "flag1": flags[0], 61 | "flag2": flags[1], 62 | "time_until_match": eta, 63 | "match_series": match_series, 64 | "match_event": match_event, 65 | "unix_timestamp": timestamp, 66 | "match_page": url_path, 67 | } 68 | ) 69 | 70 | segments = {"status": status, "segments": result} 71 | data = {"data": segments} 72 | 73 | if status != 200: 74 | raise Exception("API response: {}".format(status)) 75 | return data 76 | 77 | 78 | def vlr_live_score(num_pages=1, from_page=None, to_page=None): 79 | """ 80 | Get live match scores from VLR.GG. 81 | 82 | Args: 83 | num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified) 84 | from_page (int, optional): Starting page number (1-based) 85 | to_page (int, optional): Ending page number (1-based, inclusive) 86 | """ 87 | # Note: VLR.GG live matches are typically only on the homepage 88 | # Page range parameters are included for API consistency but may not apply 89 | url = "https://www.vlr.gg" 90 | resp = requests.get(url, headers=headers) 91 | html = HTMLParser(resp.text) 92 | status = resp.status_code 93 | 94 | matches = html.css(".js-home-matches-upcoming a.wf-module-item") 95 | result = [] 96 | for match in matches: 97 | is_live = match.css_first(".h-match-eta.mod-live") 98 | if is_live: 99 | teams = [] 100 | flags = [] 101 | scores = [] 102 | round_texts = [] 103 | for team in match.css(".h-match-team"): 104 | teams.append(team.css_first(".h-match-team-name").text().strip()) 105 | flags.append( 106 | team.css_first(".flag") 107 | .attributes["class"] 108 | .replace(" mod-", "") 109 | .replace("16", "_") 110 | ) 111 | scores.append(team.css_first(".h-match-team-score").text().strip()) 112 | round_info_ct = team.css(".h-match-team-rounds .mod-ct") 113 | round_info_t = team.css(".h-match-team-rounds .mod-t") 114 | round_text_ct = ( 115 | round_info_ct[0].text().strip() if round_info_ct else "N/A" 116 | ) 117 | round_text_t = round_info_t[0].text().strip() if round_info_t else "N/A" 118 | round_texts.append({"ct": round_text_ct, "t": round_text_t}) 119 | 120 | eta = "LIVE" 121 | match_event = match.css_first(".h-match-preview-event").text().strip() 122 | match_series = match.css_first(".h-match-preview-series").text().strip() 123 | timestamp = datetime.fromtimestamp( 124 | int(match.css_first(".moment-tz-convert").attributes["data-utc-ts"]), 125 | tz=timezone.utc, 126 | ).strftime("%Y-%m-%d %H:%M:%S") 127 | url_path = "https://www.vlr.gg/" + match.attributes["href"] 128 | 129 | match_page = requests.get(url_path, headers=headers) 130 | match_html = HTMLParser(match_page.text) 131 | 132 | team_logos = [] 133 | for img in match_html.css(".match-header-vs img"): 134 | logo_url = "https:" + img.attributes.get("src", "") 135 | team_logos.append(logo_url) 136 | 137 | current_map_element = match_html.css_first( 138 | ".vm-stats-gamesnav-item.js-map-switch.mod-active.mod-live" 139 | ) 140 | current_map = "Unknown" 141 | if current_map_element: 142 | current_map = ( 143 | current_map_element.css_first("div", default="Unknown") 144 | .text() 145 | .strip() 146 | .replace("\n", "") 147 | .replace("\t", "") 148 | ) 149 | current_map = re.sub(r"^\d+", "", current_map) 150 | map_number_match = ( 151 | current_map_element.css_first("div", default="Unknown") 152 | .text() 153 | .strip() 154 | .replace("\n", "") 155 | .replace("\t", "") 156 | ) 157 | map_number_match = re.search(r"^\d+", map_number_match) 158 | map_number = ( 159 | map_number_match.group(0) if map_number_match else "Unknown" 160 | ) 161 | 162 | team1_round_ct = round_texts[0]["ct"] if len(round_texts) > 0 else "N/A" 163 | team1_round_t = round_texts[0]["t"] if len(round_texts) > 0 else "N/A" 164 | team2_round_ct = round_texts[1]["ct"] if len(round_texts) > 1 else "N/A" 165 | team2_round_t = round_texts[1]["t"] if len(round_texts) > 1 else "N/A" 166 | result.append( 167 | { 168 | "team1": teams[0], 169 | "team2": teams[1], 170 | "flag1": flags[0], 171 | "flag2": flags[1], 172 | "team1_logo": team_logos[0] if len(team_logos) > 0 else "", 173 | "team2_logo": team_logos[1] if len(team_logos) > 1 else "", 174 | "score1": scores[0], 175 | "score2": scores[1], 176 | "team1_round_ct": team1_round_ct, 177 | "team1_round_t": team1_round_t, 178 | "team2_round_ct": team2_round_ct, 179 | "team2_round_t": team2_round_t, 180 | "map_number": map_number, 181 | "current_map": current_map, 182 | "time_until_match": eta, 183 | "match_event": match_event, 184 | "match_series": match_series, 185 | "unix_timestamp": timestamp, 186 | "match_page": url_path, 187 | } 188 | ) 189 | 190 | segments = {"status": status, "segments": result} 191 | data = {"data": segments} 192 | 193 | if status != 200: 194 | raise Exception("API response: {}".format(status)) 195 | return data 196 | 197 | 198 | def vlr_match_results(num_pages=1, from_page=None, to_page=None, max_retries=3, request_delay=1.0, timeout=30): 199 | """ 200 | Scrape match results with robust error handling for large page counts. 201 | 202 | Args: 203 | num_pages (int): Number of pages to scrape from page 1 (ignored if from_page/to_page specified) 204 | from_page (int, optional): Starting page number (1-based) 205 | to_page (int, optional): Ending page number (1-based, inclusive) 206 | max_retries (int): Maximum retry attempts per page 207 | request_delay (float): Delay between requests in seconds 208 | timeout (int): Request timeout in seconds 209 | 210 | Returns: 211 | dict: API response with match data 212 | """ 213 | 214 | result = [] 215 | status = 200 216 | failed_pages = [] 217 | 218 | # Determine page range 219 | if from_page is not None and to_page is not None: 220 | if from_page < 1: 221 | raise ValueError("from_page must be >= 1") 222 | if to_page < from_page: 223 | raise ValueError("to_page must be >= from_page") 224 | start_page = from_page 225 | end_page = to_page 226 | total_pages = end_page - start_page + 1 227 | elif from_page is not None: 228 | if from_page < 1: 229 | raise ValueError("from_page must be >= 1") 230 | start_page = from_page 231 | end_page = from_page + num_pages - 1 232 | total_pages = num_pages 233 | elif to_page is not None: 234 | if to_page < 1: 235 | raise ValueError("to_page must be >= 1") 236 | start_page = max(1, to_page - num_pages + 1) 237 | end_page = to_page 238 | total_pages = end_page - start_page + 1 239 | else: 240 | # Default behavior: scrape from page 1 241 | start_page = 1 242 | end_page = num_pages 243 | total_pages = num_pages 244 | 245 | # Create a session for connection pooling and efficiency 246 | session = requests.Session() 247 | session.headers.update(headers) 248 | 249 | print(f"Starting to scrape pages {start_page}-{end_page} ({total_pages} pages) with {request_delay}s delay between requests...") 250 | 251 | for page in range(start_page, end_page + 1): 252 | page_success = False 253 | retry_count = 0 254 | 255 | while not page_success and retry_count < max_retries: 256 | try: 257 | if page == 1: 258 | url = "https://www.vlr.gg/matches/results" 259 | else: 260 | url = f"https://www.vlr.gg/matches/results/?page={page}" 261 | 262 | current_page_num = page - start_page + 1 263 | print(f"Scraping page {page} ({current_page_num}/{total_pages}) (attempt {retry_count + 1}/{max_retries})") 264 | 265 | # Add timeout and handle potential connection issues 266 | resp = session.get(url, timeout=timeout) 267 | html = HTMLParser(resp.text) 268 | current_status = resp.status_code 269 | 270 | if current_status != 200: 271 | print(f"Warning: Page {page} returned status {current_status}") 272 | retry_count += 1 273 | if retry_count < max_retries: 274 | time.sleep(request_delay * (2 ** retry_count)) # Exponential backoff 275 | continue 276 | 277 | page_results = [] 278 | items = html.css("a.wf-module-item") 279 | 280 | if not items: 281 | print(f"Warning: No match items found on page {page}") 282 | page_success = True # Consider empty page as success 283 | break 284 | 285 | for item in items: 286 | try: 287 | url_path = item.attributes["href"] 288 | eta = item.css_first("div.ml-eta").text() + " ago" 289 | rounds = ( 290 | item.css_first("div.match-item-event-series") 291 | .text() 292 | .replace("\u2013", "-") 293 | .replace("\n", "") 294 | .replace("\t", "") 295 | ) 296 | tourney = ( 297 | item.css_first("div.match-item-event") 298 | .text() 299 | .replace("\t", " ") 300 | .strip() 301 | .split("\n")[1] 302 | .strip() 303 | ) 304 | tourney_icon_url = f"https:{item.css_first('img').attributes['src']}" 305 | 306 | try: 307 | team_array = ( 308 | item.css_first("div.match-item-vs").css_first("div:nth-child(2)").text() 309 | ) 310 | except Exception: 311 | team_array = "TBD" 312 | team_array = ( 313 | team_array.replace("\t", " ") 314 | .replace("\n", " ") 315 | .strip() 316 | .split(" ") 317 | ) 318 | team1 = team_array[0] 319 | score1 = team_array[1].replace(" ", "").strip() 320 | team2 = team_array[4].strip() 321 | score2 = team_array[-1].replace(" ", "").strip() 322 | 323 | flag_list = [ 324 | flag_parent.attributes["class"].replace(" mod-", "_") 325 | for flag_parent in item.css(".flag") 326 | ] 327 | flag1 = flag_list[0] if len(flag_list) > 0 else "" 328 | flag2 = flag_list[1] if len(flag_list) > 1 else "" 329 | 330 | page_results.append( 331 | { 332 | "team1": team1, 333 | "team2": team2, 334 | "score1": score1, 335 | "score2": score2, 336 | "flag1": flag1, 337 | "flag2": flag2, 338 | "time_completed": eta, 339 | "round_info": rounds, 340 | "tournament_name": tourney, 341 | "match_page": url_path, 342 | "tournament_icon": tourney_icon_url, 343 | "page_number": page, # Track which page this came from 344 | } 345 | ) 346 | except Exception as e: 347 | print(f"Warning: Failed to parse match item on page {page}: {str(e)}") 348 | continue 349 | 350 | result.extend(page_results) 351 | print(f"Successfully scraped page {page}: {len(page_results)} matches") 352 | page_success = True 353 | 354 | # Rate limiting between successful requests 355 | if page < end_page: 356 | time.sleep(request_delay) 357 | 358 | except requests.exceptions.Timeout: 359 | retry_count += 1 360 | print(f"Timeout error on page {page}, attempt {retry_count}/{max_retries}") 361 | if retry_count < max_retries: 362 | backoff_time = request_delay * (2 ** retry_count) 363 | print(f"Retrying page {page} in {backoff_time:.1f} seconds...") 364 | time.sleep(backoff_time) 365 | 366 | except requests.exceptions.ConnectionError: 367 | retry_count += 1 368 | print(f"Connection error on page {page}, attempt {retry_count}/{max_retries}") 369 | if retry_count < max_retries: 370 | backoff_time = request_delay * (2 ** retry_count) 371 | print(f"Retrying page {page} in {backoff_time:.1f} seconds...") 372 | time.sleep(backoff_time) 373 | 374 | except Exception as e: 375 | retry_count += 1 376 | print(f"Unexpected error on page {page}: {str(e)}") 377 | if retry_count < max_retries: 378 | backoff_time = request_delay * (2 ** retry_count) 379 | print(f"Retrying page {page} in {backoff_time:.1f} seconds...") 380 | time.sleep(backoff_time) 381 | 382 | if not page_success: 383 | failed_pages.append(page) 384 | print(f"Failed to scrape page {page} after {max_retries} attempts") 385 | 386 | # Close the session 387 | session.close() 388 | 389 | # Report results 390 | total_matches = len(result) 391 | successful_pages = total_pages - len(failed_pages) 392 | 393 | print(f"\nScraping completed:") 394 | print(f" Page range: {start_page}-{end_page}") 395 | print(f" Total matches: {total_matches}") 396 | print(f" Successful pages: {successful_pages}/{total_pages}") 397 | 398 | if failed_pages: 399 | print(f" Failed pages: {failed_pages}") 400 | print(f" Consider retrying failed pages or adjusting parameters") 401 | 402 | segments = { 403 | "status": status, 404 | "segments": result, 405 | "meta": { 406 | "page_range": f"{start_page}-{end_page}", 407 | "total_pages_requested": total_pages, 408 | "successful_pages": successful_pages, 409 | "failed_pages": failed_pages, 410 | "total_matches": total_matches 411 | } 412 | } 413 | data = {"data": segments} 414 | 415 | if not result: 416 | raise Exception(f"No data retrieved. Failed pages: {failed_pages}") 417 | 418 | return data --------------------------------------------------------------------------------