├── .gitattributes ├── .gitignore ├── LICENSE ├── package_release.py ├── readme.md ├── release_info ├── README.txt ├── config.yml └── speedrun_rescue.bat ├── requirements.txt ├── speedrunrescue.py ├── srcomapi.py └── twitch_integration.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | *.txt text 4 | *.json text 5 | *.yml text 6 | *.py text 7 | *.md text 8 | *.bat text 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | ### Python template 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # poetry 107 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 108 | # This is especially recommended for binary packages to ensure reproducibility, and is more 109 | # commonly ignored for libraries. 110 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 111 | #poetry.lock 112 | 113 | # pdm 114 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 115 | #pdm.lock 116 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 117 | # in version control. 118 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 119 | .pdm.toml 120 | .pdm-python 121 | .pdm-build/ 122 | 123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 124 | __pypackages__/ 125 | 126 | # Celery stuff 127 | celerybeat-schedule 128 | celerybeat.pid 129 | 130 | # SageMath parsed files 131 | *.sage.py 132 | 133 | # Environments 134 | .env 135 | .venv 136 | env/ 137 | venv/ 138 | ENV/ 139 | env.bak/ 140 | venv.bak/ 141 | 142 | # Spyder project settings 143 | .spyderproject 144 | .spyproject 145 | 146 | # Rope project settings 147 | .ropeproject 148 | 149 | # mkdocs documentation 150 | /site 151 | 152 | # mypy 153 | .mypy_cache/ 154 | .dmypy.json 155 | dmypy.json 156 | 157 | # Pyre type checker 158 | .pyre/ 159 | 160 | # pytype static type analyzer 161 | .pytype/ 162 | 163 | # Cython debug symbols 164 | cython_debug/ 165 | 166 | # PyCharm 167 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 168 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 169 | # and can be added to the global gitignore or merged into this file. For a more nuclear 170 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 171 | .idea/ 172 | 173 | /twitch_highlights.txt 174 | virt/ 175 | remaining_downloads.json 176 | twitch_highlights.json 177 | srcom_cached/ 178 | output/ 179 | twitch_cache.json 180 | config.json 181 | config.yml 182 | release_working/ 183 | !release_info/config.yml 184 | build_options.yml 185 | videos/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Matse007 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package_release.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import subprocess 3 | import pathlib 4 | import shutil 5 | import platform 6 | import argparse 7 | import PyInstaller.__main__ 8 | 9 | def main(): 10 | with open("build_options.yml", "r") as f: 11 | options = yaml.safe_load(f) 12 | 13 | print("Building executable!") 14 | PyInstaller.__main__.run(["speedrunrescue.py", "-D", "--noconfirm"]) 15 | 16 | release_name = options["release_name"] 17 | release_dirname = f"release_working/{release_name}" 18 | print(f"Creating release at {release_dirname}!") 19 | release_dirpath = pathlib.Path(release_dirname) 20 | if release_dirpath.is_dir(): 21 | shutil.rmtree(release_dirpath) 22 | 23 | print("Copying over files!") 24 | shutil.copytree("release_info", release_dirpath) 25 | shutil.copytree("dist/speedrunrescue", f"{release_dirname}/bin") 26 | 27 | print("Creating zip archive!") 28 | sevenz_filename = options["sevenz_filename"] 29 | 30 | subprocess.run((sevenz_filename, "a", f"release_working/SpeedrunRescueScript_{release_name}.zip", f"./{release_dirname}/*", "-tzip", "-mx=9", "-mfb=258", "-mpass=3", "-mmt=off"), check=True) 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Speedrun.com Twitch Highlight Downloader 2 | With the [recent changes to Twitch highlights](https://x.com/twitchsupport/status/1892277199497043994) which limit the total duration of highlights for a channel to 100 hours, there has been significant panic about the loss of Twitch VODs on speedrun.com. This program aims to assist in archiving Twitch VODs, by providing the following functions: 3 | - Finding all runs submitted by a user OR all runs on a game leaderboard, that are hosted on Twitch, and writing that information to a file 4 | - Downloading all runs as described above 5 | - For runs from a game leaderboard, optionally only downloading runs from channels which have exceeded the highlight limit. 6 | 7 | Have any questions? Ask in the [official speedrun.com Discord](https://discord.gg/0h6sul1ZwHVpXJmK). 8 | 9 | For people prefering a video tutorial, here is one that helps you to setup the tool on windows. 10 | 11 | https://www.youtube.com/watch?v=tkgnHt2aVmQ 12 | 13 | # Setup (Executable, Windows only) 14 | 1. Download the latest release [here](https://github.com/Matse007/SpeedrunRescueScript/releases/latest). Be sure to download the file called "SpeedrunRescueScript_v{xxx}.zip", where {xxx} is the version number. 15 | 2. Extract the zip file and open its contents. 16 | 3. Run the program by clicking `speedrun_rescue.bat`, **BUT DON'T DO SO YET**. Read the [configuration options](#configuration) first. 17 | 18 | # Setup (command line) 19 | 20 | ## Prerequisites 21 | Before running the script you need to have the following tools installed: 22 | - Python 3.x which you can download here if you have not yet: https://www.python.org/downloads/ 23 | - Install the required Python packages. The script depends on several external libraries. You can install these using pip and the requirements.txt file you find in this project as well. 24 | 25 | ## Installation Steps 26 | 1. Click the Code button on top of the webpage and press download Zip. If you are an advanced user, clone the repository. 27 | 2. Unpack the zip file or go into the folder and open a command line. If you are on Windows you can do that by clicking into the Link field in windows explorer and typing in cmd. 28 | 3. Install all the dependencies using the following command (copy pasting this into the command prompt) 29 | ```sh 30 | pip install -r requirements.txt 31 | ``` 32 | 4. Make sure to have ffmpeg installed. This script is using yt-dlp which absolutely requires ffmpeg. Look for an installation guide for installing ffmpeg. You can download it here on [their official website](https://ffmpeg.org/download.html) 33 | 34 | To run the script, run `python speedrunrescue.py`. Please read the [configuration options](#configuration) below. 35 | 36 | ## Configuration 37 | Options to the program are provided in a file called `config.yml`, in the same folder as the script (For executable users, do not worry, where config.yml is placed is correct). 38 | 39 | Telling the program what to do is very simple. The program is controlled by options, which dictates one aspect of what the program should do. 40 | 41 | ### Specifying an option 42 | To specify an option: 43 | 1. Find an empty line to place the option. 44 | 2. Place the option name, e.g. `username`, followed by a colon and a space (`: `), followed by the option value, e.g. `luckytyphlosion`. 45 | 46 | For this example, the full option would be: 47 | ``` 48 | username: luckytyphlosion 49 | ``` 50 | 51 | You should not list the same option multiple times. For example, **do not** do this: 52 | ``` 53 | username: luckytyphlosion 54 | username: Matse007 55 | ``` 56 | 57 | This is important as in the pre-made `config.yml` in the executable release, some options have already been specified, so you should not add multiple of the option. 58 | 59 | ### Ignoring an option 60 | Sometimes, you may to ignore an option in your configuration. There are two ways to do this. 61 | 1. Add a `#` at the start of the option. For example, `#username: luckytyphlosion` 62 | 2. Remove the option entirely by deleting the line. 63 | 64 | ## Tasks 65 | There are two ways of editing `config.yml`, depending on your purposes. These two are explained below. 66 | 67 | ### Downloading from a speedrun.com user 68 | 1. [Ignore](#ignoring-an-option) the `game` option if it is there and not ignored already. 69 | 2. [Specify](#specifying-an-option) an option called `username`. The value should be the speedrun.com username for which you want to download runs from. 70 | 3. Optionally, you can [specify](#specifying-an-option) the option `video-folder-name`, which will control the folder where your videos are stored. You can get the folder name by double clicking the address bar in Windows Explorer of the folder you want. Note that you must use forward slashes as path separators, e.g. `D:\speedrunrescuescript\videos` must become `D:/speedrunrescuescript/videos`. If you aren't sure, leave it as `videos`. 71 | 4. [Ignore](#ignoring-an-option) the `app-id` and `app-secret` options if they exist. 72 | 5. [Specify](#specifying-an-option) the `download-videos` option, by putting `true` if you want to fetch information about the user's runs and download the videos, or `false` if you only want to fetch the information. 73 | 6. Optionally, you can [specify](#specifying-an-option) a video quality target using the `video-quality` option. The value should either be the video quality or the video height which you want to target, e.g. `"360p"`, `"720"`, `"1080p"`, `"542"`. It can also be `"best"`, which will just automatically download the highest quality video. The program will default to `"best"` is this option is omitted. **THIS OPTION SHOULD BE IN QUOTES**, i.e. do `"360p"`, not `360p`. In case the specified quality cannot be found, the program will try to find an adjacent quality and download that. Add `>=` before the quality to download the closest higher quality, e.g. `">=480p"`, and `>=` to download the closest lower quality, e.g. `"<=480p"`. If neither are specified, the program assumes `>=` is chosen. For example, if a video has the quality options 360p and 542p, this is the logic of `>=` and `<=`: 74 | - `>=480p`: Will download 542p, as it is the next higher quality 75 | - `<=480p`: Will download 360p, as it is the next lower quality 76 | 77 | Sometimes, the lower quality encodes Twitch produces are greater in size than the lower quality resolutions (e.g. viewing the sizes of [this video](https://www.twitch.tv/videos/1906117644) using [TwitchDownloader](https://github.com/lay295/TwitchDownloader) says that the Source resolution is smaller than 480p). After deciding the desired quality, the program will check if this is the case, and download the Source quality if it is smaller than the initial desired quality. 78 | 7. [Specify](#specifying-an-option) the `ignore-links-in-description` option with `true` if want to ignore video links that are posted in the run description and only check video links in the submission field, and `false` if you want to check links from both the submission field and the description. Not recommended as some people put other parts of the run in the description. 79 | 8. [Specify](#specifying-an-option) the `safe-only-pbs` option with `true` if you want to only want to consider your own pbs or `false` if you want to include obsolete runs. This option does only work on a user as of right now. 80 | 9. Optionally you can [specify](#specifying-an-option) the `concurrent-fragments` option with a postive integer of how many video fragments you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1. 81 | 82 | Here is an example config that will download twitch runs from [speedrun.com user luckytyphlosion](https://speedrun.com/users/luckytyphlosion). 83 | ```yaml 84 | # Specify either a game or a speedrun.com username 85 | username: "luckytyphlosion" 86 | # The output folder of the videos. Stored on a separate drive in this example 87 | video-folder-name: D:/speedrunrescuescript/videos 88 | # Whether to download the videos or just look at the output 89 | download-videos: true 90 | #specify the desired videoquality ranges from 160 - 1080. Can be left empty, it will default to the best quality. 91 | video-quality: ">=1080p" 92 | #specify if you explicitly want to ignore links that are posted in the run description and only check submission videos. 93 | ignore-links-in-description: false 94 | #specify if you explicitly only want to only look at your personal bests. 95 | safe-only-pbs: true 96 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1. 97 | concurrent-fragments: 1 98 | ``` 99 | 100 | ### Downloading from a speedrun.com leaderboard 101 | Before you start, you must set up a Twitch API App. You will only need to do this once. Instructions are provided below. You can also read Twitch's official instructions [here](https://dev.twitch.tv/docs/authentication/register-app/). 102 | 103 | #### Setting up a Twitch API App 104 | 1. Enable two-factor authentication (2FA) for your account. This is required in order to create apps. To enable 2FA, navigate to [Security and Privacy](https://www.twitch.tv/settings/security), and follow the steps for enabling 2FA under the Security section. 105 | 2. Log in to the [developer console](https://dev.twitch.tv/console) using your Twitch account. 106 | 3. Select the **Applications** tab on the left side and then click **Register Your Application**. 107 | 4. Set the **Name** of your application to anything (I used "Highlight Limit Detector"). 108 | 5. Set the **OAuth Redirect URLs** to `http://localhost`. Do not click Add. 109 | 6. Set the **Category** of your application to something fitting (I used "Analytics") 110 | 7. Keep the Client Type as Confidential. 111 | 8. Click **Create** to make your app. You may need to solve a Captcha. 112 | 9. Back in the **Applications** tab, locate your app under **Developer Applications**, and click **Manage**. 113 | 10. Scroll down to the **Client ID** and save the text in the textbox (looks like a random string of characters) for later. 114 | 11. Under **Client Secret**, click the **New Secret** button, confirm with **OK**, and then save the text that is shown for later. This will disappear after you leave the page, so be sure to save it somewhere safe. 115 | * **WARNING: DO NOT SHARE THIS CLIENT SECRET**. Letting it become public can lead to people abusing the API with **YOUR** account, and can possibly lead to you getting banned from Twitch. 116 | 117 | #### Setting up the configuration for a speedrun.com leaderboard 118 | 1. [Ignore](#ignoring-an-option) the `username` option if it is there and not ignored already. 119 | 2. [Specify](#specifying-an-option) an option called `game`. The value should be the speedrun.com game abbreviation of the leaderboard you want to download. You can find the abbreviation in the url of a leaderboard, after `speedrun.com`. For example, the abbreviation of https://speedrun.com/sm64 is `sm64`. 120 | 3. Optionally, you can [specify](#specifying-an-option) the option `video-folder-name`, which will control the folder where your videos are stored. You can get the folder name by double clicking the address bar in Windows Explorer of the folder you want. Note that you must use forward slashes as path separators, e.g. `D:\speedrunrescuescript\videos` must become `D:/speedrunrescuescript/videos`. If you aren't sure, leave it as `videos`. 121 | 4. [Specify](#ignoring-an-option) the `app-id` option. The value should be the **Client ID** which you saved earlier. 122 | 5. [Specify](#ignoring-an-option) the `app-secret` option. The value should be the **Client Secret** which you saved earlier. 123 | 6. [Specify](#specifying-an-option) the `download-videos` option, by putting `true` if you want to fetch information about the user's runs and download the videos, or `false` if you only want to fetch the information. 124 | 7. [Specify](#specifying-an-option) the `allow-all` option. This should be `false` if you only want to download videos of channels who have not reached the 100h limit, or `true` if you want to download all runs regardless. 125 | 8. Optionally, you can [specify](#specifying-an-option) a video quality target using the `video-quality` option. The value should either be the video quality or the video height which you want to target, e.g. `"360p"`, `"720"`, `"1080p"`, `"542"`. It can also be `"best"`, which will just automatically download the highest quality video. The program will default to `"best"` is this option is omitted. **THIS OPTION SHOULD BE IN QUOTES**, i.e. do `"360p"`, not `360p`. In case the specified quality cannot be found, the program will try to find an adjacent quality and download that. Add `>=` before the quality to download the closest higher quality, e.g. `">=480p"`, and `>=` to download the closest lower quality, e.g. `"<=480p"`. If neither are specified, the program assumes `>=` is chosen. For example, if a video has the quality options 360p and 542p, this is the logic of `>=` and `<=`: 126 | - `>=480p`: Will download 542p, as it is the next higher quality 127 | - `<=480p`: Will download 360p, as it is the next lower quality 128 | 129 | Sometimes, the lower quality encodes Twitch produces are greater in size than the lower quality resolutions (e.g. viewing the sizes of [this video](https://www.twitch.tv/videos/1906117644) using [TwitchDownloader](https://github.com/lay295/TwitchDownloader) says that the Source resolution is smaller than 480p). After deciding the desired quality, the program will check if this is the case, and download the Source quality if it is smaller than the initial desired quality. 130 | 9. [Specify](#specifying-an-option) the `ignore-links-in-description` option with `true` if want to ignore video links that are posted in the run description and only check video links in the submission field, and `false` if you want to check links from both the submission field and the description. Not recommended as some people put other parts of the run in the description. 131 | 10. [Specify](#specifying-an-option) the `safe-only-pbs` option with `true` if you want to only want to consider your own pbs or `false` if you want to include obsolete runs. This option does only work on a user as of right now. 132 | 11. Optionally you can [specify](#specifying-an-option) the `concurrent-fragments` option with a postive integer of how many video fragments you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1. 133 | 134 | Here is an example config that will download twitch runs from [the speedrun.com leaderboard for Rockman EXE 4.5: Real Operation](https://speedrun.com/mmbn4.5). 135 | ```yaml 136 | # Specify either a game or a speedrun.com username 137 | game: "mmbn4.5" 138 | # The output folder of the videos. Stored on a separate drive in this example 139 | video-folder-name: D:/speedrunrescuescript/videos 140 | # Whether to download the videos or just look at the output 141 | app-id: e3udyluhnly6q6g2qp5a00nwaz73dj 142 | app-secret: n8p6t5qy6f33lnm3v8jjgwliqazps0 143 | download-videos: false 144 | allow-all: false 145 | #specify the desired videoquality ranges from 160 - 1080. Can be left empty, it will default to the best quality. 146 | video-quality: ">=1080p" 147 | #specify if you explicitly want to ignore links that are posted in the run description and only check submission videos. 148 | ignore-links-in-description: false 149 | #specify if you explicitly only want to only look at your personal bests. 150 | safe-only-pbs: true 151 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1. 152 | concurrent-fragments: 1 153 | ``` 154 | 155 | ## Additional filtering 156 | If `download-videos` is `false`, you can edit the list of files that would be downloaded. For downloading user runs, the relevant files are in `output/user/`. For downloading leaderboard runs, the relevant files are in `output/game/`. 157 | 158 | You can delete lines in `remaining_downloads.json` to omit downloading certain files. This can be useful if you want to avoid downloading runs which you know have a mirror elsewhere. Note that if you choose not to process the "remaining downloads file", this file will be overwritten, so please keep a backup somewhere. 159 | 160 | ## Errors 161 | Q: I'm getting outdated information from speedrun.com/Twitch. How do I fix this? 162 | 163 | A: To get updated information from speedrun.com, delete the folder named `srcom_cached`. To get updated information from Twitch, delete the file named `twitch_cache.json`. It is recommended to do this infrequently in order to save time by not issuing requests for information which is mostly up-to-date. 164 | -------------------------------------------------------------------------------- /release_info/README.txt: -------------------------------------------------------------------------------- 1 | Please visit https://github.com/matse007/SpeedrunRescueScript for more information. 2 | -------------------------------------------------------------------------------- /release_info/config.yml: -------------------------------------------------------------------------------- 1 | # Specify either a game or a speedrun.com username 2 | game: "" 3 | username: "" 4 | 5 | # Not necessary if you're downloading from a username 6 | #app-id: 7 | #app-secret: 8 | 9 | # Where to store the videos 10 | video-folder-name: videos 11 | 12 | # Whether to download the videos or just look at the output 13 | download-videos: true 14 | 15 | # For games, whether to download all videos irregardless of whether or not the channel has exceeded the 100 hour highlight limit 16 | allow-all: false 17 | 18 | # specify the desired video quality (or height of the video). See the readme for more info. Can be left empty, it will default to the best quality. 19 | video-quality: "best" 20 | 21 | # specify if you explicitly want to ignore links that are posted in the run description and only check submission videos. 22 | ignore-links-in-description: false 23 | 24 | #specify if you explicitly only want to only look at your personal bests. 25 | safe-only-pbs: true 26 | 27 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1. 28 | concurrent-fragments: 1 -------------------------------------------------------------------------------- /release_info/speedrun_rescue.bat: -------------------------------------------------------------------------------- 1 | call bin\speedrunrescue.exe -cfg config.yml 2 | pause 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests~=2.32.3 2 | yt-dlp~=2025.2.19 3 | isodate~=0.7.2 4 | twitchAPI~=4.4.0 5 | configargparse~=1.7 6 | PyYAML~=6.0.2 7 | -------------------------------------------------------------------------------- /speedrunrescue.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import re 3 | import time 4 | import requests 5 | from urllib.parse import quote 6 | from isodate import parse_duration 7 | import yt_dlp 8 | import yt_dlp.postprocessor 9 | import json 10 | from datetime import datetime 11 | import srcomapi 12 | import twitch_integration 13 | from twitch_integration import twitch_c_v_url_regex, twitch_current_url_regex 14 | import asyncio 15 | import pathlib 16 | import configargparse 17 | import traceback 18 | import sys 19 | 20 | # Configuration 21 | BASE_URL = "https://www.speedrun.com/api/v1" 22 | RATE_LIMIT = 0.6 # 600ms between requests because rate limits. Something I learned today 23 | DEBUG_FILE = "debug_log.txt" 24 | HIGHLIGHTS_FILE = "twitch_highlights_mmbn5.txt" 25 | HIGHLIGHTS_JSON = "twitch_highlights_mmbn5.json" 26 | DOWNLOADS_REMAINING_FILE = "downloads_remaining_mmbn5.json" 27 | timestamp = time.time() 28 | jsonData ={} 29 | 30 | def get_user_id(username): 31 | #getting the userid first from their username 32 | try: 33 | data = srcomapi.get(f"/users/{quote(username)}") 34 | return data['data']['id'] 35 | except KeyError: 36 | print("Invalid username or API error") 37 | return None 38 | except requests.exceptions.RequestException as e: 39 | print(f"Network error: {e}") 40 | return None 41 | 42 | def get_game_id(game): 43 | data = srcomapi.get(f"/games?abbreviation={game}&max=1&_bulk=yes") 44 | 45 | game_id = data["data"][0]["id"] 46 | return game_id 47 | 48 | def get_personal_bests(user_id): 49 | run_ids = [] 50 | url = f"/users/{user_id}/personal-bests?embed=game,category" 51 | try: 52 | # Fetch all personal bests in a single request 53 | data = srcomapi.get(url) 54 | # Extract the runs from the response 55 | if data and 'data' in data: 56 | for pb in data['data']: 57 | pb = pb['run']['id'] 58 | run_ids.append(pb) 59 | return set(run_ids) 60 | else: 61 | print("No personal bests found or invalid response from the API.") 62 | return [] 63 | except requests.exceptions.RequestException as e: 64 | print(f"Error fetching personal bests: {e}") 65 | return [] 66 | 67 | def get_all_runs(user_id): 68 | #gettign all runs with pagination in mind. 69 | runs = [] 70 | offset = 0 71 | direction = "asc" 72 | last_id = "" 73 | 74 | while True: 75 | url = f"/runs?user={user_id}&max=200&offset={offset}&status=verified&embed=game,category,players&direction={direction}&orderby=date" 76 | try: 77 | data = srcomapi.get(url) 78 | if last_id: 79 | found_duplicate = False 80 | for index, run in enumerate(data['data']): 81 | if run['id'] == last_id: 82 | runs.extend(data['data'][0:index]) 83 | found_duplicate = True 84 | break 85 | if found_duplicate: 86 | break 87 | runs.extend(data['data']) 88 | # Pagination check 89 | if data['pagination']['size'] < 200: 90 | break 91 | offset += 200 92 | if offset >= 10_000: 93 | if not last_id: 94 | last_id = runs[-1]["id"] 95 | direction = "desc" 96 | offset = 0 97 | else: 98 | break 99 | except requests.exceptions.RequestException as e: 100 | print(f"Error fetching runs: {e}") 101 | break 102 | return runs 103 | 104 | def get_all_runs_from_game(game_id): 105 | runs = [] 106 | offset = 0 107 | direction = "asc" 108 | last_id = "" 109 | 110 | while True: 111 | url = f"/runs?game={game_id}&max=200&offset={offset}&status=verified&embed=game,category,players&direction={direction}&orderby=date" 112 | try: 113 | print(f"offset: {offset}") 114 | data = srcomapi.get(url) 115 | if last_id: 116 | found_duplicate = False 117 | for index, run in enumerate(data['data']): 118 | if run['id'] == last_id: 119 | runs.extend(data['data'][0:index]) 120 | found_duplicate = True 121 | break 122 | if found_duplicate: 123 | break 124 | runs.extend(data['data']) 125 | 126 | # Pagination check 127 | if data['pagination']['size'] < 200: 128 | break 129 | offset += 200 130 | if offset >= 10_000: 131 | if not last_id: 132 | last_id = runs[-1]["id"] 133 | direction = "desc" 134 | offset = 0 135 | else: 136 | break 137 | except requests.exceptions.RequestException as e: 138 | print(f"Error fetching runs: {e}") 139 | break 140 | 141 | return runs 142 | 143 | twitch_url_regex = re.compile(r"(https?:\/\/)?(?:\w+\.)?twitch\.tv\/\S*", re.IGNORECASE) 144 | 145 | IS_NOT_TWITCH_URL = 0 146 | IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL = 1 147 | IS_TWITCH_VIDEO_URL = 2 148 | 149 | def is_twitch_video_url(url): 150 | # Checking with regex if its a twitch highlight 151 | is_base_twitch_url = twitch_url_regex.search(url) 152 | if is_base_twitch_url: 153 | if twitch_current_url_regex.search(url) or twitch_c_v_url_regex.search(url): 154 | return IS_TWITCH_VIDEO_URL 155 | else: 156 | return IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL 157 | else: 158 | return IS_NOT_TWITCH_URL 159 | 160 | #Checking if a stream is live. Only happens if its an old dead link that redirects to the channel and the channel is live 161 | def filter_live(info): 162 | # If the video is live, return a string indicating the reason for skipping. 163 | if info.get('is_live', False): 164 | return "Skipping live stream" 165 | # Otherwise, return None to allow the video. 166 | return None 167 | 168 | async def process_runs(runs, client, ignore_links_in_description): 169 | #Extract Twitch highlight urls from runs 170 | highlights = [] 171 | all_twitch_urls = [] 172 | for run in runs: 173 | videos = run.get('videos') or {} 174 | links = videos.get('links') or [] 175 | twitch_urls = [] 176 | if ignore_links_in_description and links: 177 | links = [links[-1]] 178 | for video in links: 179 | uri = video.get('uri', '') 180 | result = is_twitch_video_url(uri) 181 | if result == IS_TWITCH_VIDEO_URL: 182 | twitch_urls.append(uri) 183 | elif result == IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL: 184 | print(f"Skipped non-video twitch url {uri}") 185 | 186 | if len(twitch_urls) != 0: 187 | player_twitch_yt_urls = [] 188 | player_datas = run["players"]["data"] 189 | player_names = [] 190 | for player in player_datas: 191 | if player["rel"] == "guest": 192 | player_names.append(player["name"]) 193 | else: 194 | twitch_info = player.get("twitch") 195 | if twitch_info is not None: 196 | player_twitch_yt_urls.append(twitch_info["uri"]) 197 | 198 | youtube_info = player.get("youtube") 199 | if youtube_info is not None: 200 | player_twitch_yt_urls.append(youtube_info["uri"]) 201 | 202 | player_names.append(player["names"]["international"]) 203 | 204 | highlight = { 205 | 'players': player_names, 206 | 'game': run['game']['data']['names']['international'], 207 | 'abbreviation': run['game']['data']['abbreviation'], 208 | 'category': run['category']['data']['name'], 209 | 'time': run['times']['primary'], 210 | 'urls': twitch_urls, 211 | 'run_id': run['id'], 212 | 'submitted': run.get('submitted', 'Unknown date'), 213 | 'date': run.get('date', 'Unknown date'), 214 | 'comment': run.get('comment', '') 215 | } 216 | 217 | all_twitch_urls.extend(twitch_urls) 218 | if len(player_twitch_yt_urls) != 0: 219 | highlight["vod_sites"] = player_twitch_yt_urls 220 | 221 | highlights.append(highlight) 222 | 223 | if client.twitch is not None: 224 | await client.fetch_info(all_twitch_urls) 225 | client.write_twitch_users_at_risk() 226 | 227 | return highlights 228 | 229 | def format_date_of_submission(dateobj): 230 | try: 231 | formatted_date = datetime.fromisoformat(dateobj).strftime("%B %d, %Y") 232 | except (KeyError, ValueError, TypeError): 233 | formatted_date = "Unknown date" 234 | return formatted_date 235 | 236 | def save_highlights(highlights, client, is_game, highlights_filename, remaining_downloads_filename, highlights_json_filename): 237 | #saving all highlights in a formatted way for the user i guess? My hope is I can automate uploads later 238 | num_at_risk = 0 239 | 240 | for highlight in highlights: 241 | new_twitch_urls = [] 242 | at_risk = False 243 | for twitch_url in highlight["urls"]: 244 | if not is_game: 245 | at_risk = True 246 | else: 247 | at_risk = client.is_video_at_risk(twitch_url) 248 | 249 | if at_risk: 250 | new_twitch_urls.append(f"{twitch_url}*****") 251 | else: 252 | new_twitch_urls.append(twitch_url) 253 | 254 | highlight["urls"] = new_twitch_urls 255 | highlight["at_risk"] = at_risk 256 | if at_risk: 257 | num_at_risk += 1 258 | 259 | print(f"Number of at-risk runs: {num_at_risk}") 260 | 261 | with open(highlights_filename, "w", encoding="utf-8") as f: 262 | for entry in highlights: 263 | #formatting the iso format 264 | 265 | f.write(f"Players: {', '.join(entry['players'])}\n") 266 | f.write(f"Category: {entry['category']}\n") 267 | f.write(f"Time: {str(parse_duration(entry['time']))}\n") 268 | f.write(f"Submitted Date: {format_date_of_submission(entry['submitted'])}\n") 269 | f.write(f"Run Date: {format_date_of_submission(entry['date'])}\n") 270 | f.write(f"URL: {' '.join(entry['urls'])}\n") 271 | f.write(f"SRC Link: https://speedrun.com/{entry['abbreviation']}/runs/{entry['run_id']}\n") 272 | f.write(f"Channel exceeds 100h limit: {entry['at_risk']}\n") 273 | f.write(f"Comment: {entry['comment']}\n") 274 | vod_sites = entry.get("vod_sites") 275 | if vod_sites is not None: 276 | f.write(f"Vod sites: {' '.join(vod_sites)}\n") 277 | 278 | f.write("-" * 50 + "\n") 279 | 280 | urls = [] 281 | for entry in highlights: 282 | src_link = f"https://speedrun.com/{entry['abbreviation']}/runs/{entry['run_id']}" 283 | urls.extend((url, src_link) for url in entry["urls"]) 284 | 285 | with open(remaining_downloads_filename, "w", encoding="utf-8") as f: 286 | json.dump(urls, f, indent=4) 287 | with open(highlights_json_filename, "w", encoding="utf-8") as f: 288 | json.dump(highlights, f, indent=4) 289 | 290 | 291 | def print_exception(e, additional_msg=""): 292 | error_msg = e.args[0] if len(e.args) >= 1 else "(Not provided)" 293 | 294 | output = f"""\ 295 | 296 | 297 | 298 | ================================================================ 299 | ======================== ERROR OCCURRED ======================== 300 | {additional_msg}{error_msg} 301 | ================================================================ 302 | 303 | -- DEBUG INFORMATION -- 304 | Error type: {e.__class__.__name__} 305 | Traceback (most recent call last) 306 | {''.join(traceback.format_tb(e.__traceback__))}""" 307 | 308 | print(output) 309 | 310 | class DesiredQuality: 311 | __slots__ = ("download_best", "desired_height", "fallback_should_increase_quality") 312 | 313 | def __init__(self, download_best, desired_height, fallback_should_increase_quality): 314 | self.download_best = download_best 315 | self.desired_height = desired_height 316 | self.fallback_should_increase_quality = fallback_should_increase_quality 317 | 318 | @classmethod 319 | def from_string(cls, input_str): 320 | input_str = input_str.strip() 321 | if input_str == "best": 322 | return cls(True, 0, False) 323 | 324 | if input_str.startswith("<="): 325 | fallback_should_increase_quality = False 326 | input_str = input_str[2:] 327 | elif input_str.startswith(">="): 328 | fallback_should_increase_quality = True 329 | input_str = input_str[2:] 330 | else: 331 | fallback_should_increase_quality = True 332 | 333 | if input_str[-1] == "p": 334 | input_str = input_str[:-1] 335 | 336 | try: 337 | desired_height = int(input_str) 338 | except ValueError: 339 | raise RuntimeError(f"Invalid format for `video-quality` (got: {input_str}). Please specify the video quality or desired height of the video, e.g. 360p, 720, 1080, 542. You can also add >= or <= before the quality to tell the program whether to download the closest higher quality or closest lower quality, respectively, if the quality does not exist. If you omit >= and <=, it defaults to choosing the closest higher quality.") 340 | 341 | return cls(False, desired_height, fallback_should_increase_quality) 342 | 343 | class QualityPostprocessor(yt_dlp.postprocessor.PostProcessor): 344 | __slots__ = ("desired_height", "fallback_should_increase_quality") 345 | 346 | def __init__(self, desired_quality): 347 | super(QualityPostprocessor, self).__init__(None) 348 | self.desired_height = desired_quality.desired_height 349 | self.fallback_should_increase_quality = desired_quality.fallback_should_increase_quality 350 | 351 | @staticmethod 352 | def is_format_source(quality_format): 353 | # No hard and fast rule, so test multiple things 354 | if "source" in quality_format["format_id"].lower() or "source" in quality_format.get("format_note", "").lower() or "source" in quality_format.get("format", "").lower(): 355 | return True 356 | else: 357 | return False 358 | 359 | def run(self, info): 360 | best_height = 0 361 | best_tbr = 0 362 | best_format_id = None 363 | source_format = None 364 | source_format_id = None 365 | 366 | formats_sorted_by_height = sorted(info["formats"], key=lambda x: x.get("height", 0)) 367 | 368 | #with open("video_info.json", "w+") as f: 369 | # json.dump(info, f, indent=2) 370 | # 371 | #with open("formats_sorted_by_height.json", "w+") as f: 372 | # json.dump(formats_sorted_by_height, f, indent=2) 373 | 374 | #print(f"formats_sorted_by_height: {formats_sorted_by_height}") 375 | for quality_format in formats_sorted_by_height: 376 | if quality_format["vcodec"] == "none": 377 | #print(f"Continued {quality_format}") 378 | continue 379 | 380 | format_id = quality_format["format_id"] 381 | # some videos e.g. https://www.twitch.tv/videos/118628100 382 | # have no height associated with some formats 383 | # not really sure how to integrate this into the current quality filtering logic, so just skip these for now 384 | height = quality_format.get("height") 385 | if height is None: 386 | continue 387 | 388 | tbr = quality_format["tbr"] 389 | is_source = QualityPostprocessor.is_format_source(quality_format) 390 | 391 | if is_source: 392 | source_format = quality_format 393 | 394 | #print(f"best_height: {best_height}, height: {height}, self.desired_height: {self.desired_height}, is_source: {is_source}, quality_format: {quality_format}\n\n\n") 395 | 396 | if best_height == 0 or height < self.desired_height: 397 | best_height = height 398 | best_tbr = tbr 399 | best_format_id = format_id 400 | # edge case for when there are multiple formats with the same height and we have to choose between them 401 | elif height == self.desired_height: 402 | # if the best height isn't even the desired height yet, then set it so 403 | # otherwise, it is, and we need to choose out of the two which to pick 404 | # I think this only happens when one is source quality 405 | 406 | if best_height != self.desired_height or is_source: 407 | best_height = height 408 | best_tbr = tbr 409 | best_format_id = format_id 410 | # only do this logic if we want to fallback to a higher quality 411 | # if the height we chose doesn't match the desired height 412 | elif self.fallback_should_increase_quality: 413 | # if the current best height is less than the desired height, and we want to fallback to quality higher 414 | # edge case to pick the source quality when we meet qualities with the same height 415 | if best_height < self.desired_height or (best_height == height and is_source): 416 | best_height = height 417 | best_tbr = tbr 418 | best_format_id = format_id 419 | 420 | # Sometimes, the source format size can be less than encoded formats at a lower resolution 421 | # if this is true for the best format we picked, then choose the source format 422 | if source_format is not None and source_format.get("tbr") is not None and best_tbr is not None and source_format["tbr"] < best_tbr: 423 | best_format_id = source_format["format_id"] 424 | 425 | # include audio format just in case somehow, the best video format has no audio 426 | new_formats = [quality_format for quality_format in info["formats"] if quality_format["format_id"] == best_format_id or (quality_format["acodec"] != "none" and quality_format["vcodec"] == "none")] 427 | 428 | # if we somehow can't find any formats, then just try to download anything 429 | if len(new_formats) != 0: 430 | info["formats"] = new_formats 431 | 432 | #print(f"Post processor info: {info}") 433 | 434 | return [], info 435 | 436 | def download_videos(remaining_downloads_filename, video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, allow_all, desired_quality, concurrent_fragments): 437 | #pathlib.Path(download_folder_name).mkdir(parents=True, exist_ok=True) 438 | #downloading videos out of the provided dict using the yt-dlp module. 439 | 440 | download_info_template = """\ 441 | URL: %(original_url)s 442 | speedrun.com URL: {src_url} 443 | Channel: %(uploader_id)s 444 | Title: %(title)s 445 | Date: %(upload_date>%Y-%m-%d)s 446 | Duration: %(duration>%H:%M:%S)s 447 | Description: 448 | %(description)s 449 | ==========================================================""" 450 | 451 | print_to_file_list = [[download_info_template, downloaded_video_info_filename]] 452 | 453 | ydl_options = { 454 | 'format': "bestvideo+bestaudio/best", 455 | 'outtmpl': f'{video_folder_name}/{download_type_str}/{game_or_username}/%(title)s_%(id)s_%(format_id)s.%(ext)s', 456 | 'noplaylist': True, 457 | 'match_filter': filter_live, #uses a function to determine if the dead link now links to a stream and accidentially starts to download this instead. Hopefully should skip livestreams 458 | "print_to_file": {"after_video": print_to_file_list}, 459 | 'verbose': True, # for debugging stuff 460 | 'sleep-interval': 5, #so i dont get insta blacklisted by twitch 461 | 'retries': 1, # Retry a second time a bit later in case there was simply an issue 462 | 'retry-delay': 10, # Wait 10 seconds before retrying 463 | 'concurrent_fragment_downloads': concurrent_fragments, 464 | } 465 | 466 | if desired_quality.download_best: 467 | quality_postprocessor = None 468 | else: 469 | quality_postprocessor = QualityPostprocessor(desired_quality) 470 | 471 | while True: 472 | try: 473 | # Load URLs from JSON file 474 | with open(remaining_downloads_filename, "r", encoding="utf-8") as f: 475 | urls = json.load(f) 476 | 477 | # Stop if no URLs are left 478 | if not urls: 479 | print("All downloads completed!") 480 | break 481 | 482 | url_info = urls[0] 483 | if isinstance(url_info, list): 484 | current_url, src_link = url_info 485 | else: 486 | current_url = url_info 487 | src_link = "N/A" 488 | 489 | sleep_time = 15 490 | if allow_all or current_url.endswith("*****"): 491 | clean_url = current_url.replace("*****", "") # Cleaning up the extraspacing 492 | print(f"Downloading: {clean_url}") 493 | print_to_file_list[0][0] = download_info_template.format(src_url=src_link) 494 | with yt_dlp.YoutubeDL(ydl_options) as ydl: 495 | if quality_postprocessor is not None: 496 | ydl.add_post_processor(quality_postprocessor, when="pre_process") 497 | 498 | try: 499 | ydl.download([clean_url]) 500 | except Exception as e: 501 | error_msg = e.args[0] if len(e.args) >= 1 else "" 502 | # Video does not exist 503 | # video_does_not_exist_regex = re.compile(r"Video \w+ does not exist", flags=re.IGNORECASE) <-- seemed not to work. as a quick fix i disabled it and check manually 504 | if ("does not exist" in error_msg) or ("The channel is not currently live" in error_msg): 505 | print(f"Skipping invalid or dead link: {clean_url}") 506 | with open(downloaded_video_info_filename, "a+") as f: 507 | f.write(f"{clean_url} for {src_link} does not exist\n==========================================================\n") 508 | #sleep_time = 15 509 | 510 | else: 511 | print_exception(e, f"Failed to download {clean_url}: ") 512 | with open(downloaded_video_info_filename, "a+") as f: 513 | f.write(f"Failed to download {clean_url}: {error_msg}\n==========================================================\n") 514 | else: 515 | print(f"Skipping {current_url} (not marked as at-risk)") 516 | sleep_time = 0 517 | 518 | urls.pop(0) 519 | with open(remaining_downloads_filename, "w", encoding="utf-8") as f: 520 | json.dump(urls, f, indent=4) 521 | if sleep_time != 0: 522 | print(f"Waiting {sleep_time} seconds before downloading the next video.") 523 | time.sleep(sleep_time) 524 | except FileNotFoundError: 525 | print("No remaining downloads file found") 526 | break 527 | except json.JSONDecodeError: 528 | print("Error reading JSON file") 529 | break 530 | except KeyboardInterrupt: 531 | print("\nDownload interrupted by user. Progress saved.") 532 | with open(remaining_downloads_filename, "w", encoding="utf-8") as f: 533 | json.dump(urls, f, indent=4) 534 | break 535 | except Exception as e: 536 | print_exception(e, "Unexpected error: ") 537 | print(f"Unexpected error: {e}") 538 | break 539 | 540 | def load_remaining_downloads(remaining_downloads_filename): 541 | try: 542 | with open(remaining_downloads_filename, "r", encoding="utf-8") as f: 543 | urls = json.load(f) 544 | if not urls: 545 | print("No remaining downloads file found") 546 | return None 547 | return urls 548 | except FileNotFoundError: 549 | print("No remaining downloads file found") 550 | except json.JSONDecodeError: 551 | print("Error reading JSON file") 552 | except Exception as e: 553 | print(f"Unexpected error: {e}") 554 | 555 | def convert_bool(value): 556 | value_str_lower = value.lower() 557 | if value_str_lower == "true": 558 | return True 559 | elif value_str_lower == "false": 560 | return False 561 | else: 562 | raise configargparse.ArgumentTypeError(f"Invalid bool type (must be `true` or `false`, got {value})") 563 | 564 | def process_personal_bests(runs, pb_ids): 565 | return [run for run in runs if run["id"] in pb_ids] 566 | 567 | async def main(): 568 | ap = configargparse.ArgumentParser( 569 | allow_abbrev=False, 570 | config_file_parser_class=configargparse.YAMLConfigFileParser, 571 | config_file_open_func=lambda filename: open( 572 | filename, "r", encoding="utf-8" 573 | ) 574 | ) 575 | 576 | ap.add_argument("-cfg", "--config", dest="config", default="config.yml", is_config_file=True, help="Alternative config file to put in command line arguments. Arguments provided on the command line will override arguments provided in the config file, if specified.") 577 | ap.add_argument("--game", dest="game", default=None, help="The game of the leaderboard you want to scrape for Twitch links. Either this or `username:` must be specified") 578 | ap.add_argument("--username", dest="username", default=None, help="The speedrun.com username for the runs you want to scrape for Twitch links. Either this or `game:` must be specified") 579 | ap.add_argument("--app-id", dest="app_id", default=None, help="Name of the Twitch API App ID used for checking if a user has 100 or more hours of highlights. Required for game download. Not necessary for username download.") 580 | ap.add_argument("--app-secret", dest="app_secret", default=None, help="Name of the Twitch API App Secret. See `app-id:` for more info") 581 | ap.add_argument("--video-folder-name", dest="video_folder_name", default="videos", help="Folder where the videos will be stored. Videos will automatically be sorted by game and username. Will be created if it doesn't exist already. Default is a folder \"videos\" in the same directory as the script") 582 | ap.add_argument("--cache-filename", dest="cache_filename", default="twitch_cache.json", help="File containing information about users' videos from the Twitch API (for determining if a user has >= 100 hours of highlights). Default is twitch_cache.json") 583 | ap.add_argument("--download-videos", dest="download_videos", type=convert_bool, help="Whether to download videos after scraping them from speedrun.com", required=True) 584 | ap.add_argument("--allow-all", dest="allow_all", type=convert_bool, help="Whether to download all found videos regardless of whether or not the channel they exist on have reached the >=100h highlight limit.", required=True) 585 | ap.add_argument("--video-quality", dest="video_quality", default="best", help="Desired closest video quality that you want to download. For this option, specify the video quality or desired height of the video, e.g. 360p, 720, 1080, 542. Choosing \"best\" will just download the best quality available. THIS OPTION SHOULD BE IN QUOTES, i.e. do \"360p\", not 360p. You can also add >= or <= before the quality to tell the program whether to download the closest higher quality or closest lower quality, respectively, if the quality does not exist. If you omit >= and <=, it defaults to choosing the closest higher quality. Defaults to \"best\".") 586 | ap.add_argument("--ignore-links-in-description", dest="ignore_links_in_description", type=convert_bool, help="Whether to ignore twitch links that are in the video description or not. By default this is disabled.", required=True) 587 | ap.add_argument("--concurrent-fragments", dest="concurrent_fragments", type=int, help="How many concurrent fragments to download of a video. By default this is 1.") 588 | ap.add_argument("--safe-only-pbs", dest="save_only_pbs", type=convert_bool,help="If set to true, only the PBs of the runner or all PBs on the leaderboard are being saved.",required=True) 589 | args = ap.parse_args() 590 | 591 | desired_quality = DesiredQuality.from_string(args.video_quality) 592 | 593 | print(f"Using quality: {args.video_quality}") 594 | 595 | if args.game and args.username: 596 | raise RuntimeError("Only one of `username:` or `game:` must be specified in config.yml!") 597 | 598 | game = args.game 599 | username = args.username 600 | if game: 601 | download_type_str = "game" 602 | game_or_username = game 603 | is_game = True 604 | elif not username: 605 | raise RuntimeError("One of `username:` or `game:` must be specified in config.yml!") 606 | else: 607 | download_type_str = "user" 608 | game_or_username = username 609 | is_game = False 610 | 611 | base_output_dirpath = pathlib.Path(f"output/{download_type_str}/{game_or_username}") 612 | base_output_dirpath.mkdir(parents=True, exist_ok=True) 613 | 614 | highlights_filename = f"{base_output_dirpath}/twitch_highlights.txt" 615 | highlights_json_filename = f"{base_output_dirpath}/twitch_highlights.json" 616 | remaining_downloads_filename = f"{base_output_dirpath}/remaining_downloads.json" 617 | downloaded_video_info_filename = f"{base_output_dirpath}/download_info.txt" 618 | 619 | concurrent_fragments = args.concurrent_fragments or 1 620 | 621 | #Check if there are remaining Downloads left. 622 | remaininDownloads = load_remaining_downloads(remaining_downloads_filename) 623 | if remaininDownloads and input("A remaining downloads file has been found. Do you want to continue the download? (y/n): ").lower().startswith("y"): 624 | download_videos(remaining_downloads_filename, args.video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, args.allow_all, desired_quality, concurrent_fragments) 625 | return 626 | 627 | if is_game: 628 | print(f"Searching for {game}...") 629 | game_id = get_game_id(game) 630 | print(f"Getting all runs") 631 | runs = get_all_runs_from_game(game_id) 632 | else: 633 | print(f"Searching for {username}...") 634 | # Getting the user id first from the username. 635 | user_id = get_user_id(username) 636 | if not user_id: 637 | print("User not found") 638 | return 639 | 640 | # Fetch all runs from user 641 | print("Fetching runs...") 642 | runs = get_all_runs(user_id) 643 | if args.save_only_pbs: 644 | pb_ids = get_personal_bests(user_id) 645 | runs = process_personal_bests(runs, pb_ids) 646 | 647 | print(f"Found {len(runs)} verified runs") 648 | 649 | if (args.app_id is None or args.app_secret is None) and is_game: 650 | raise RuntimeError("Twitch integration must be present if you are requesting a game to be downloaded") 651 | client = await twitch_integration.TwitchClient.init(args) 652 | # Checking for highlights 653 | highlights = await process_runs(runs, client, args.ignore_links_in_description) 654 | print(f"Found {len(highlights)} Twitch highlights") 655 | 656 | # Save highlights 657 | save_highlights(highlights, client, is_game, highlights_filename, remaining_downloads_filename, highlights_json_filename) 658 | print(f"Saved highlights to {highlights_filename}") 659 | 660 | # Download prompt for users and downloading videos 661 | if highlights and args.download_videos: 662 | download_videos(remaining_downloads_filename, args.video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, args.allow_all, desired_quality, concurrent_fragments) 663 | print("Download completed") 664 | 665 | if __name__ == "__main__": 666 | try: 667 | asyncio.run(main()) 668 | except Exception as e: 669 | print_exception(e) 670 | sys.exit(1) 671 | -------------------------------------------------------------------------------- /srcomapi.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import requests 3 | import urllib 4 | import pathlib 5 | import json 6 | import time 7 | import re 8 | import sys 9 | 10 | class CacheSettings: 11 | __slots__ = ("read_cache", "write_cache", "cache_dirname", "rate_limit", "retry_on_empty") 12 | 13 | def __init__(self, read_cache, write_cache, cache_dirname, rate_limit): 14 | self.read_cache = read_cache 15 | self.write_cache = write_cache 16 | self.cache_dirname = cache_dirname 17 | self.rate_limit = rate_limit 18 | 19 | default_cache_settings = CacheSettings(True, True, "srcom_cached", True) 20 | 21 | def get_cached_endpoint_filepath(endpoint, params, cache_settings): 22 | endpoint_as_pathname = f"{cache_settings.cache_dirname}/{urllib.parse.quote(endpoint, safe='')}_q_{urllib.parse.urlencode(params, doseq=True)}.json" 23 | 24 | return pathlib.Path(endpoint_as_pathname) 25 | 26 | API_URL = "https://www.speedrun.com/api/v1" 27 | 28 | def get(endpoint, params=None, cache_settings=None, require_success=False): 29 | exception_sleep_time = 15 30 | 31 | while True: 32 | try: 33 | return get_in_loop_code(endpoint, params, cache_settings)[0] 34 | except ConnectionError as e: 35 | print(f"Exception occurred: {e}\n{''.join(traceback.format_tb(e.__traceback__))}\nSleeping for {exception_sleep_time} seconds now.") 36 | time.sleep(exception_sleep_time) 37 | exception_sleep_time *= 2 38 | if exception_sleep_time > 1000: 39 | exception_sleep_time = 1000 40 | 41 | def get_in_loop_code(endpoint, params, cache_settings): 42 | if params is None: 43 | params = {} 44 | 45 | if cache_settings is None: 46 | cache_settings = default_cache_settings 47 | 48 | endpoint_as_path = get_cached_endpoint_filepath(endpoint, params, cache_settings) 49 | if cache_settings.read_cache and endpoint_as_path.is_file(): 50 | error_code = None 51 | 52 | endpoint_as_path_size = endpoint_as_path.stat().st_size 53 | if endpoint_as_path_size == 0: 54 | return {}, 404 55 | 56 | #print(f"endpoint_as_path: {endpoint_as_path}") 57 | with open(endpoint_as_path, "r", encoding="utf-8") as f: 58 | data = json.load(f) 59 | 60 | if error_code is None: 61 | return data, 200 62 | 63 | url = f"{API_URL}{endpoint}" 64 | print(f"url: {url}?{urllib.parse.urlencode(params, doseq=True)}") 65 | start_time = time.time() 66 | r = requests.get(url, params=params) 67 | end_time = time.time() 68 | print(f"Request took {end_time - start_time}.") 69 | 70 | if cache_settings.write_cache: 71 | endpoint_as_path.parent.mkdir(parents=True, exist_ok=True) 72 | 73 | if r.status_code != 200: 74 | if r.status_code >= 400 and r.status_code < 500: 75 | raise RuntimeError(f"API returned {r.status_code}: {r.reason}") 76 | 77 | raise ConnectionError(f"Got status code {r.status_code}!") 78 | #return r.reason, r.status_code 79 | #if r.status_code != 404: 80 | # raise ConnectionError(f"Got status code {r.status_code}!") 81 | # 82 | #if cache_settings.write_cache: 83 | # if r.status_code == 404: 84 | # endpoint_as_path.touch() 85 | # else: 86 | # print(f"Got non-404 error code: {r.status_code}") 87 | # with open(endpoint_as_path, "w+") as f: 88 | # f.write(str(r.status_code)) 89 | # 90 | #return r.reason, r.status_code 91 | 92 | data = r.json() 93 | 94 | if cache_settings.write_cache: 95 | endpoint_as_path.parent.mkdir(parents=True, exist_ok=True) 96 | data_as_str = json.dumps(data, separators=(",", ":")) 97 | exit_after_write = False 98 | while True: 99 | try: 100 | with open(endpoint_as_path, "w+", encoding="utf-8") as f: 101 | f.write(data_as_str) 102 | break 103 | except KeyboardInterrupt: 104 | print("Saving speedrun.com API cache, please stop Ctrl-C'ing") 105 | exit_after_write = True 106 | 107 | if exit_after_write: 108 | sys.exit(1) 109 | 110 | if cache_settings.rate_limit: 111 | time.sleep(1) 112 | 113 | return data, r.status_code 114 | -------------------------------------------------------------------------------- /twitch_integration.py: -------------------------------------------------------------------------------- 1 | from twitchAPI.twitch import Twitch 2 | from twitchAPI.helper import first 3 | import asyncio 4 | import json 5 | import pathlib 6 | import itertools 7 | import re 8 | import sys 9 | 10 | twitch_c_v_url_regex = re.compile(r"(?:https?:\/\/)?(?:\w+\.)?twitch\.tv\/(\w+)\/([cv])\/(\d+)", re.IGNORECASE) 11 | twitch_current_url_regex = re.compile(r"(?:https?:\/\/)?(?:\w+\.)?twitch\.tv\/videos/(\d+)", re.IGNORECASE) 12 | 13 | def grouper(iterable, n): 14 | it = iter(iterable) 15 | while True: 16 | chunk = list(itertools.islice(it, n)) 17 | if not chunk: # Stop when no more elements are left 18 | break 19 | yield chunk 20 | 21 | duration_regex = re.compile(r"^(?:([0-9]+)h)?(?:([0-9]+)m)?(?:([0-9]+)s?)?$") 22 | 23 | def parse_duration(duration): 24 | match_obj = duration_regex.match(duration.strip()) 25 | if match_obj: 26 | hours = match_obj.group(1) 27 | minutes = match_obj.group(2) 28 | seconds = match_obj.group(3) 29 | if hours is None and minutes is None and seconds is None: 30 | raise RuntimeError(f"Invalid duration \"{expiry_time}\" provided for expiry time!") 31 | 32 | if hours is None: 33 | hours = 0 34 | if minutes is None: 35 | minutes = 0 36 | if seconds is None: 37 | seconds = 0 38 | 39 | try: 40 | duration_as_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds) 41 | except ValueError: 42 | raise RuntimeError(f"At least one of hours, seconds, and minutes not an integer!") 43 | else: 44 | raise RuntimeError(f"Invalid duration \"{expiry_time}\" provided for expiry time!") 45 | 46 | return duration_as_seconds 47 | 48 | class UserCache: 49 | __slots__ = ("cache_filename", "cache_info") 50 | 51 | def __init__(self, cache_filename): 52 | cache_filepath = pathlib.Path(cache_filename) 53 | if cache_filepath.is_file(): 54 | with open(cache_filename, "r") as f: 55 | cache_info = json.load(f) 56 | else: 57 | cache_info = { 58 | "video_infos": {}, 59 | "user_infos": {}, 60 | "total_duration": -1 61 | } 62 | 63 | self.cache_info = cache_info 64 | self.cache_filename = cache_filename 65 | 66 | def parse_valid_video_id(self, video_url, update_c=False): 67 | match_obj = twitch_c_v_url_regex.match(video_url) 68 | if match_obj: 69 | url_type = match_obj.group(2) 70 | if url_type == "c": 71 | if update_c: 72 | user_info = self.get_user_info(match_obj.group(1)) 73 | user_info["c_video_urls"].append(video_url) 74 | print(f"Skipped c-type url {video_url}") 75 | video_id = None 76 | else: 77 | video_id = match_obj.group(3) 78 | else: 79 | match_obj = twitch_current_url_regex.match(video_url) 80 | if match_obj: 81 | video_id = match_obj.group(1) 82 | else: 83 | print(f"Skipped non-video url {video_url}") 84 | video_id = None 85 | 86 | return video_id 87 | 88 | async def update_video_infos_from_video_urls(self, twitch, video_urls): 89 | valid_nonfound_video_ids = [] 90 | print("Finding valid video ids!") 91 | for video_url in video_urls: 92 | video_id = self.parse_valid_video_id(video_url, update_c=True) 93 | if video_id is not None: 94 | video_info = self.cache_info["video_infos"].get(video_id) 95 | if video_info is None: 96 | valid_nonfound_video_ids.append(video_id) 97 | 98 | if len(valid_nonfound_video_ids) != 0: 99 | print(f"Fetching video info from {len(valid_nonfound_video_ids)} valid video ids!") 100 | for i, valid_nonfound_video_ids_chunk in enumerate(grouper(valid_nonfound_video_ids, 100)): 101 | print(f"video_ids_chunk: {valid_nonfound_video_ids_chunk}") 102 | print(f"Parsing chunk {100*i}") 103 | async for video_info_obj in twitch.get_videos(ids=valid_nonfound_video_ids_chunk, first=100): 104 | video_info = video_info_obj.to_dict() 105 | self.cache_info["video_infos"][video_info["id"]] = video_info 106 | 107 | valid_nonfound_video_ids_as_set = frozenset(valid_nonfound_video_ids) 108 | found_video_info_ids = frozenset(self.cache_info["video_infos"].keys()) 109 | missing_video_ids = valid_nonfound_video_ids_as_set - found_video_info_ids 110 | 111 | for missing_video_id in missing_video_ids: 112 | self.cache_info["video_infos"][missing_video_id] = {"missing": True} 113 | 114 | self.save_cache() 115 | 116 | async def update_user_infos_from_video_infos(self, twitch): 117 | for video_id, video_info in self.cache_info["video_infos"].items(): 118 | if video_info.get("missing"): 119 | continue 120 | 121 | username = video_info["user_login"] 122 | user_info = self.get_user_info(username) 123 | if len(user_info["videos"]) == 0: 124 | print(f"Downloading video info for {username}!") 125 | user_id = video_info["user_id"] 126 | num_video_infos = 0 127 | async for user_video_info_obj in twitch.get_videos(user_id=user_id, first=100): 128 | user_video_info = user_video_info_obj.to_dict() 129 | user_info["videos"][user_video_info["id"]] = user_video_info 130 | num_video_infos += 1 131 | 132 | print(f"num_video_infos: {num_video_infos}") 133 | self.save_cache() 134 | 135 | def determine_at_risk_users(self): 136 | print(f"Determining at risk users!") 137 | for username, user_info in self.cache_info["user_infos"].items(): 138 | total_duration = 0 139 | for video_id, user_video_info in user_info["videos"].items(): 140 | if user_video_info["type"] == "highlight": 141 | total_duration += parse_duration(user_video_info["duration"]) 142 | 143 | user_info["total_duration"] = total_duration 144 | 145 | self.save_cache() 146 | 147 | def is_video_at_risk(self, video_url): 148 | video_id = self.parse_valid_video_id(video_url) 149 | if video_id is None: 150 | return False 151 | 152 | video_info = self.cache_info["video_infos"].get(video_id) 153 | if video_info is None or video_info.get("missing"): 154 | # Want to report missing videos via yt-dlp 155 | return True 156 | 157 | username = video_info["user_login"] 158 | user_info = self.cache_info["user_infos"].get(username) 159 | if user_info is None: 160 | # Be safe and download the video if for some reason the username doesn't exist 161 | return True 162 | 163 | if user_info["total_duration"] >= 360000: 164 | return True 165 | else: 166 | return False 167 | 168 | def write_twitch_users_at_risk(self): 169 | twitch_users_sorted_by_total_duration = sorted(self.cache_info["user_infos"].items(), key=lambda x: x[1]["total_duration"], reverse=True) 170 | output = "".join(f"{username}: {user_info['total_duration']}\n" for username, user_info in twitch_users_sorted_by_total_duration) 171 | 172 | with open("output/twitch_users_sorted_by_total_duration.txt", "w+") as f: 173 | f.write(output) 174 | 175 | def get_user_info(self, username): 176 | user_info = self.cache_info["user_infos"].get(username) 177 | if user_info is None: 178 | user_info = { 179 | "c_video_urls": [], 180 | "videos": {} 181 | } 182 | self.cache_info["user_infos"][username] = user_info 183 | return user_info 184 | 185 | def save_cache(self): 186 | exit_after_write = False 187 | cache_info_as_str = json.dumps(self.cache_info, indent=2) 188 | while True: 189 | try: 190 | with open(self.cache_filename, "w+") as f: 191 | f.write(cache_info_as_str) 192 | 193 | break 194 | except KeyboardInterrupt: 195 | print("Saving Twitch cache, please stop Ctrl-C'ing") 196 | exit_after_write = True 197 | 198 | if exit_after_write: 199 | sys.exit(1) 200 | 201 | class TwitchClient: 202 | __slots__ = ("twitch", "user_cache") 203 | 204 | def __init__(self, args, twitch): 205 | self.twitch = twitch 206 | self.user_cache = UserCache(args.cache_filename) 207 | 208 | @classmethod 209 | async def init(cls, args): 210 | app_id = args.app_id 211 | app_secret = args.app_secret 212 | if app_id is None or app_secret is None: 213 | twitch = None 214 | else: 215 | twitch = await Twitch(app_id, app_secret) 216 | 217 | return cls(args, twitch) 218 | 219 | async def fetch_info(self, video_urls): 220 | await self.user_cache.update_video_infos_from_video_urls(self.twitch, video_urls) 221 | await self.user_cache.update_user_infos_from_video_infos(self.twitch) 222 | self.user_cache.determine_at_risk_users() 223 | 224 | def is_video_at_risk(self, video_url): 225 | return self.user_cache.is_video_at_risk(video_url) 226 | 227 | def write_twitch_users_at_risk(self): 228 | self.user_cache.write_twitch_users_at_risk() 229 | --------------------------------------------------------------------------------