├── .gitattributes
├── .gitignore
├── LICENSE
├── package_release.py
├── readme.md
├── release_info
    ├── README.txt
    ├── config.yml
    └── speedrun_rescue.bat
├── requirements.txt
├── speedrunrescue.py
├── srcomapi.py
└── twitch_integration.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | *.txt text
4 | *.json text
5 | *.yml text
6 | *.py text
7 | *.md text
8 | *.bat text
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Default ignored files
  2 | /shelf/
  3 | /workspace.xml
  4 | # Editor-based HTTP Client requests
  5 | /httpRequests/
  6 | # Datasource local storage ignored files
  7 | /dataSources/
  8 | /dataSources.local.xml
  9 | ### Python template
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | cover/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | db.sqlite3-journal
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | .pybuilder/
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | #   For a library or package, you might want to ignore these files since the code is
 96 | #   intended to run in multiple environments; otherwise, check them in:
 97 | # .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # poetry
107 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
109 | #   commonly ignored for libraries.
110 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111 | #poetry.lock
112 | 
113 | # pdm
114 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115 | #pdm.lock
116 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117 | #   in version control.
118 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
119 | .pdm.toml
120 | .pdm-python
121 | .pdm-build/
122 | 
123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
124 | __pypackages__/
125 | 
126 | # Celery stuff
127 | celerybeat-schedule
128 | celerybeat.pid
129 | 
130 | # SageMath parsed files
131 | *.sage.py
132 | 
133 | # Environments
134 | .env
135 | .venv
136 | env/
137 | venv/
138 | ENV/
139 | env.bak/
140 | venv.bak/
141 | 
142 | # Spyder project settings
143 | .spyderproject
144 | .spyproject
145 | 
146 | # Rope project settings
147 | .ropeproject
148 | 
149 | # mkdocs documentation
150 | /site
151 | 
152 | # mypy
153 | .mypy_cache/
154 | .dmypy.json
155 | dmypy.json
156 | 
157 | # Pyre type checker
158 | .pyre/
159 | 
160 | # pytype static type analyzer
161 | .pytype/
162 | 
163 | # Cython debug symbols
164 | cython_debug/
165 | 
166 | # PyCharm
167 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
168 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
169 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
170 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
171 | .idea/
172 | 
173 | /twitch_highlights.txt
174 | virt/
175 | remaining_downloads.json
176 | twitch_highlights.json
177 | srcom_cached/
178 | output/
179 | twitch_cache.json
180 | config.json
181 | config.yml
182 | release_working/
183 | !release_info/config.yml
184 | build_options.yml
185 | videos/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Matse007
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/package_release.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import subprocess
 3 | import pathlib
 4 | import shutil
 5 | import platform
 6 | import argparse
 7 | import PyInstaller.__main__
 8 | 
 9 | def main():
10 |     with open("build_options.yml", "r") as f:
11 |         options = yaml.safe_load(f)
12 | 
13 |     print("Building executable!")
14 |     PyInstaller.__main__.run(["speedrunrescue.py", "-D", "--noconfirm"])
15 | 
16 |     release_name = options["release_name"]
17 |     release_dirname = f"release_working/{release_name}"
18 |     print(f"Creating release at {release_dirname}!")
19 |     release_dirpath = pathlib.Path(release_dirname)
20 |     if release_dirpath.is_dir():
21 |         shutil.rmtree(release_dirpath)
22 | 
23 |     print("Copying over files!")
24 |     shutil.copytree("release_info", release_dirpath)
25 |     shutil.copytree("dist/speedrunrescue", f"{release_dirname}/bin")
26 | 
27 |     print("Creating zip archive!")
28 |     sevenz_filename = options["sevenz_filename"]
29 |     
30 |     subprocess.run((sevenz_filename, "a", f"release_working/SpeedrunRescueScript_{release_name}.zip", f"./{release_dirname}/*", "-tzip", "-mx=9", "-mfb=258", "-mpass=3", "-mmt=off"), check=True)
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Speedrun.com Twitch Highlight Downloader
  2 | With the [recent changes to Twitch highlights](https://x.com/twitchsupport/status/1892277199497043994) which limit the total duration of highlights for a channel to 100 hours, there has been significant panic about the loss of Twitch VODs on speedrun.com. This program aims to assist in archiving Twitch VODs, by providing the following functions:
  3 | - Finding all runs submitted by a user OR all runs on a game leaderboard, that are hosted on Twitch, and writing that information to a file
  4 | - Downloading all runs as described above
  5 |   - For runs from a game leaderboard, optionally only downloading runs from channels which have exceeded the highlight limit.
  6 | 
  7 | Have any questions? Ask in the [official speedrun.com Discord](https://discord.gg/0h6sul1ZwHVpXJmK).
  8 | 
  9 | For people prefering a video tutorial, here is one that helps you to setup the tool on windows.
 10 | 
 11 | https://www.youtube.com/watch?v=tkgnHt2aVmQ
 12 | 
 13 | # Setup (Executable, Windows only)
 14 | 1. Download the latest release [here](https://github.com/Matse007/SpeedrunRescueScript/releases/latest). Be sure to download the file called "SpeedrunRescueScript_v{xxx}.zip", where {xxx} is the version number.
 15 | 2. Extract the zip file and open its contents.
 16 | 3. Run the program by clicking `speedrun_rescue.bat`, **BUT DON'T DO SO YET**. Read the [configuration options](#configuration) first.
 17 | 
 18 | # Setup (command line)
 19 | 
 20 | ## Prerequisites
 21 | Before running the script you need to have the following tools installed:  
 22 | - Python 3.x which you can download here if you have not yet: https://www.python.org/downloads/
 23 | - Install the required Python packages. The script depends on several external libraries. You can install these using pip and the requirements.txt file you find in this project as well.
 24 | 
 25 | ## Installation Steps
 26 | 1. Click the Code button on top of the webpage and press download Zip. If you are an advanced user, clone the repository.
 27 | 2. Unpack the zip file or go into the folder and open a command line. If you are on Windows you can do that by clicking into the Link field in windows explorer and typing in cmd.
 28 | 3. Install all the dependencies using the following command (copy pasting this into the command prompt)
 29 | ```sh
 30 | pip install -r requirements.txt
 31 | ```
 32 | 4. Make sure to have ffmpeg installed. This script is using yt-dlp which absolutely requires ffmpeg. Look for an installation guide for installing ffmpeg. You can download it here on [their official website](https://ffmpeg.org/download.html)
 33 | 
 34 | To run the script, run `python speedrunrescue.py`. Please read the [configuration options](#configuration) below.
 35 | 
 36 | ## Configuration
 37 | Options to the program are provided in a file called `config.yml`, in the same folder as the script (For executable users, do not worry, where config.yml is placed is correct).
 38 | 
 39 | Telling the program what to do is very simple. The program is controlled by options, which dictates one aspect of what the program should do.
 40 | 
 41 | ### Specifying an option
 42 | To specify an option:
 43 | 1. Find an empty line to place the option.
 44 | 2. Place the option name, e.g. `username`, followed by a colon and a space (`: `), followed by the option value, e.g. `luckytyphlosion`.
 45 | 
 46 | For this example, the full option would be:
 47 | ```
 48 | username: luckytyphlosion
 49 | ```
 50 | 
 51 | You should not list the same option multiple times. For example, **do not** do this:
 52 | ```
 53 | username: luckytyphlosion
 54 | username: Matse007
 55 | ```
 56 | 
 57 | This is important as in the pre-made `config.yml` in the executable release, some options have already been specified, so you should not add multiple of the option.
 58 | 
 59 | ### Ignoring an option
 60 | Sometimes, you may to ignore an option in your configuration. There are two ways to do this.
 61 | 1. Add a `#` at the start of the option. For example, `#username: luckytyphlosion`
 62 | 2. Remove the option entirely by deleting the line.
 63 | 
 64 | ## Tasks
 65 | There are two ways of editing `config.yml`, depending on your purposes. These two are explained below.
 66 | 
 67 | ### Downloading from a speedrun.com user
 68 | 1. [Ignore](#ignoring-an-option) the `game` option if it is there and not ignored already. 
 69 | 2. [Specify](#specifying-an-option) an option called `username`. The value should be the speedrun.com username for which you want to download runs from.
 70 | 3. Optionally, you can [specify](#specifying-an-option) the option `video-folder-name`, which will control the folder where your videos are stored. You can get the folder name by double clicking the address bar in Windows Explorer of the folder you want. Note that you must use forward slashes as path separators, e.g. `D:\speedrunrescuescript\videos` must become `D:/speedrunrescuescript/videos`. If you aren't sure, leave it as `videos`.
 71 | 4. [Ignore](#ignoring-an-option) the `app-id` and `app-secret` options if they exist.
 72 | 5. [Specify](#specifying-an-option) the `download-videos` option, by putting `true` if you want to fetch information about the user's runs and download the videos, or `false` if you only want to fetch the information.
 73 | 6. Optionally, you can [specify](#specifying-an-option) a video quality target using the `video-quality` option. The value should either be the video quality or the video height which you want to target, e.g. `"360p"`, `"720"`, `"1080p"`, `"542"`. It can also be `"best"`, which will just automatically download the highest quality video. The program will default to `"best"` is this option is omitted. **THIS OPTION SHOULD BE IN QUOTES**, i.e. do `"360p"`, not `360p`. In case the specified quality cannot be found, the program will try to find an adjacent quality and download that. Add `>=` before the quality to download the closest higher quality, e.g. `">=480p"`, and `>=` to download the closest lower quality, e.g. `"<=480p"`. If neither are specified, the program assumes `>=` is chosen. For example, if a video has the quality options 360p and 542p, this is the logic of `>=` and `<=`:
 74 |     - `>=480p`: Will download 542p, as it is the next higher quality
 75 |     - `<=480p`: Will download 360p, as it is the next lower quality
 76 | 
 77 |     Sometimes, the lower quality encodes Twitch produces are greater in size than the lower quality resolutions (e.g. viewing the sizes of [this video](https://www.twitch.tv/videos/1906117644) using [TwitchDownloader](https://github.com/lay295/TwitchDownloader) says that the Source resolution is smaller than 480p). After deciding the desired quality, the program will check if this is the case, and download the Source quality if it is smaller than the initial desired quality.
 78 | 7. [Specify](#specifying-an-option) the `ignore-links-in-description` option with `true` if want to ignore video links that are posted in the run description and only check video links in the submission field, and `false` if you want to check links from both the submission field and the description. Not recommended as some people put other parts of the run in the description.
 79 | 8. [Specify](#specifying-an-option) the `safe-only-pbs` option with `true` if you want to only want to consider your own pbs or `false` if you want to include obsolete runs. This option does only work on a user as of right now.
 80 | 9. Optionally you can [specify](#specifying-an-option) the `concurrent-fragments` option with a postive integer of how many video fragments you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1.
 81 | 
 82 | Here is an example config that will download twitch runs from [speedrun.com user luckytyphlosion](https://speedrun.com/users/luckytyphlosion).
 83 | ```yaml
 84 | # Specify either a game or a speedrun.com username
 85 | username: "luckytyphlosion"
 86 | # The output folder of the videos. Stored on a separate drive in this example
 87 | video-folder-name: D:/speedrunrescuescript/videos
 88 | # Whether to download the videos or just look at the output
 89 | download-videos: true
 90 | #specify the desired videoquality ranges from 160 - 1080. Can be left empty, it will default to the best quality.
 91 | video-quality: ">=1080p"
 92 | #specify if you explicitly want to ignore links that are posted in the run description and only check submission videos.
 93 | ignore-links-in-description: false
 94 | #specify if you explicitly only want to only look at your personal bests.
 95 | safe-only-pbs: true
 96 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1.
 97 | concurrent-fragments: 1
 98 | ```
 99 | 
100 | ### Downloading from a speedrun.com leaderboard
101 | Before you start, you must set up a Twitch API App. You will only need to do this once. Instructions are provided below. You can also read Twitch's official instructions [here](https://dev.twitch.tv/docs/authentication/register-app/).
102 | 
103 | #### Setting up a Twitch API App
104 | 1. Enable two-factor authentication (2FA) for your account. This is required in order to create apps. To enable 2FA, navigate to [Security and Privacy](https://www.twitch.tv/settings/security), and follow the steps for enabling 2FA under the Security section.
105 | 2. Log in to the [developer console](https://dev.twitch.tv/console) using your Twitch account.
106 | 3. Select the **Applications** tab on the left side and then click **Register Your Application**.
107 | 4. Set the **Name** of your application to anything (I used "Highlight Limit Detector").
108 | 5. Set the **OAuth Redirect URLs** to `http://localhost`. Do not click Add.
109 | 6. Set the **Category** of your application to something fitting (I used "Analytics")
110 | 7. Keep the Client Type as Confidential.
111 | 8. Click **Create** to make your app. You may need to solve a Captcha.
112 | 9. Back in the **Applications** tab, locate your app under **Developer Applications**, and click **Manage**.
113 | 10. Scroll down to the **Client ID** and save the text in the textbox (looks like a random string of characters) for later.
114 | 11. Under **Client Secret**, click the **New Secret** button, confirm with **OK**, and then save the text that is shown for later. This will disappear after you leave the page, so be sure to save it somewhere safe.
115 |     * **WARNING: DO NOT SHARE THIS CLIENT SECRET**. Letting it become public can lead to people abusing the API with **YOUR** account, and can possibly lead to you getting banned from Twitch.
116 | 
117 | #### Setting up the configuration for a speedrun.com leaderboard
118 | 1. [Ignore](#ignoring-an-option) the `username` option if it is there and not ignored already. 
119 | 2. [Specify](#specifying-an-option) an option called `game`. The value should be the speedrun.com game abbreviation of the leaderboard you want to download. You can find the abbreviation in the url of a leaderboard, after `speedrun.com`. For example, the abbreviation of https://speedrun.com/sm64 is `sm64`.
120 | 3. Optionally, you can [specify](#specifying-an-option) the option `video-folder-name`, which will control the folder where your videos are stored. You can get the folder name by double clicking the address bar in Windows Explorer of the folder you want. Note that you must use forward slashes as path separators, e.g. `D:\speedrunrescuescript\videos` must become `D:/speedrunrescuescript/videos`. If you aren't sure, leave it as `videos`.
121 | 4. [Specify](#ignoring-an-option) the `app-id` option. The value should be the **Client ID** which you saved earlier.
122 | 5. [Specify](#ignoring-an-option) the `app-secret` option. The value should be the **Client Secret** which you saved earlier.
123 | 6. [Specify](#specifying-an-option) the `download-videos` option, by putting `true` if you want to fetch information about the user's runs and download the videos, or `false` if you only want to fetch the information.
124 | 7. [Specify](#specifying-an-option) the `allow-all` option. This should be `false` if you only want to download videos of channels who have not reached the 100h limit, or `true` if you want to download all runs regardless.
125 | 8. Optionally, you can [specify](#specifying-an-option) a video quality target using the `video-quality` option. The value should either be the video quality or the video height which you want to target, e.g. `"360p"`, `"720"`, `"1080p"`, `"542"`. It can also be `"best"`, which will just automatically download the highest quality video. The program will default to `"best"` is this option is omitted. **THIS OPTION SHOULD BE IN QUOTES**, i.e. do `"360p"`, not `360p`. In case the specified quality cannot be found, the program will try to find an adjacent quality and download that. Add `>=` before the quality to download the closest higher quality, e.g. `">=480p"`, and `>=` to download the closest lower quality, e.g. `"<=480p"`. If neither are specified, the program assumes `>=` is chosen. For example, if a video has the quality options 360p and 542p, this is the logic of `>=` and `<=`:
126 |     - `>=480p`: Will download 542p, as it is the next higher quality
127 |     - `<=480p`: Will download 360p, as it is the next lower quality
128 | 
129 |     Sometimes, the lower quality encodes Twitch produces are greater in size than the lower quality resolutions (e.g. viewing the sizes of [this video](https://www.twitch.tv/videos/1906117644) using [TwitchDownloader](https://github.com/lay295/TwitchDownloader) says that the Source resolution is smaller than 480p). After deciding the desired quality, the program will check if this is the case, and download the Source quality if it is smaller than the initial desired quality.
130 | 9. [Specify](#specifying-an-option) the `ignore-links-in-description` option with `true` if want to ignore video links that are posted in the run description and only check video links in the submission field, and `false` if you want to check links from both the submission field and the description. Not recommended as some people put other parts of the run in the description.
131 | 10. [Specify](#specifying-an-option) the `safe-only-pbs` option with `true` if you want to only want to consider your own pbs or `false` if you want to include obsolete runs. This option does only work on a user as of right now. 
132 | 11. Optionally you can [specify](#specifying-an-option) the `concurrent-fragments` option with a postive integer of how many video fragments you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1.
133 | 
134 | Here is an example config that will download twitch runs from [the speedrun.com leaderboard for Rockman EXE 4.5: Real Operation](https://speedrun.com/mmbn4.5).
135 | ```yaml
136 | # Specify either a game or a speedrun.com username
137 | game: "mmbn4.5"
138 | # The output folder of the videos. Stored on a separate drive in this example
139 | video-folder-name: D:/speedrunrescuescript/videos
140 | # Whether to download the videos or just look at the output
141 | app-id: e3udyluhnly6q6g2qp5a00nwaz73dj
142 | app-secret: n8p6t5qy6f33lnm3v8jjgwliqazps0
143 | download-videos: false
144 | allow-all: false
145 | #specify the desired videoquality ranges from 160 - 1080. Can be left empty, it will default to the best quality.
146 | video-quality: ">=1080p"
147 | #specify if you explicitly want to ignore links that are posted in the run description and only check submission videos.
148 | ignore-links-in-description: false
149 | #specify if you explicitly only want to only look at your personal bests.
150 | safe-only-pbs: true
151 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1.
152 | concurrent-fragments: 1
153 | ```
154 | 
155 | ## Additional filtering
156 | If `download-videos` is `false`, you can edit the list of files that would be downloaded. For downloading user runs, the relevant files are in `output/user/<username>`. For downloading leaderboard runs, the relevant files are in `output/game/<game>`.
157 | 
158 | You can delete lines in `remaining_downloads.json` to omit downloading certain files. This can be useful if you want to avoid downloading runs which you know have a mirror elsewhere. Note that if you choose not to process the "remaining downloads file", this file will be overwritten, so please keep a backup somewhere.
159 | 
160 | ## Errors
161 | Q: I'm getting outdated information from speedrun.com/Twitch. How do I fix this?
162 | 
163 | A: To get updated information from speedrun.com, delete the folder named `srcom_cached`. To get updated information from Twitch, delete the file named `twitch_cache.json`. It is recommended to do this infrequently in order to save time by not issuing requests for information which is mostly up-to-date.
164 | 


--------------------------------------------------------------------------------
/release_info/README.txt:
--------------------------------------------------------------------------------
1 | Please visit https://github.com/matse007/SpeedrunRescueScript for more information.
2 | 


--------------------------------------------------------------------------------
/release_info/config.yml:
--------------------------------------------------------------------------------
 1 | # Specify either a game or a speedrun.com username
 2 | game: ""
 3 | username: ""
 4 | 
 5 | # Not necessary if you're downloading from a username
 6 | #app-id: <your app id>
 7 | #app-secret: <your app secret>
 8 | 
 9 | # Where to store the videos
10 | video-folder-name: videos
11 | 
12 | # Whether to download the videos or just look at the output
13 | download-videos: true
14 | 
15 | # For games, whether to download all videos irregardless of whether or not the channel has exceeded the 100 hour highlight limit
16 | allow-all: false
17 | 
18 | # specify the desired video quality (or height of the video). See the readme for more info. Can be left empty, it will default to the best quality.
19 | video-quality: "best"
20 | 
21 | # specify if you explicitly want to ignore links that are posted in the run description and only check submission videos.
22 | ignore-links-in-description: false
23 | 
24 | #specify if you explicitly only want to only look at your personal bests.
25 | safe-only-pbs: true
26 | 
27 | #amount of concurrent video fragments that you want to download concurrently. Note that this will create the specified number of threads so if your system can't handle this leave this at 1.
28 | concurrent-fragments: 1


--------------------------------------------------------------------------------
/release_info/speedrun_rescue.bat:
--------------------------------------------------------------------------------
1 | call bin\speedrunrescue.exe -cfg config.yml
2 | pause
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests~=2.32.3
2 | yt-dlp~=2025.2.19
3 | isodate~=0.7.2
4 | twitchAPI~=4.4.0
5 | configargparse~=1.7
6 | PyYAML~=6.0.2
7 | 


--------------------------------------------------------------------------------
/speedrunrescue.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import re
  3 | import time
  4 | import requests
  5 | from urllib.parse import quote
  6 | from isodate import parse_duration
  7 | import yt_dlp
  8 | import yt_dlp.postprocessor
  9 | import json
 10 | from datetime import datetime
 11 | import srcomapi
 12 | import twitch_integration
 13 | from twitch_integration import twitch_c_v_url_regex, twitch_current_url_regex
 14 | import asyncio
 15 | import pathlib
 16 | import configargparse
 17 | import traceback
 18 | import sys
 19 | 
 20 | # Configuration
 21 | BASE_URL = "https://www.speedrun.com/api/v1"
 22 | RATE_LIMIT = 0.6  # 600ms between requests because rate limits. Something I learned today
 23 | DEBUG_FILE = "debug_log.txt"
 24 | HIGHLIGHTS_FILE = "twitch_highlights_mmbn5.txt"
 25 | HIGHLIGHTS_JSON = "twitch_highlights_mmbn5.json"
 26 | DOWNLOADS_REMAINING_FILE = "downloads_remaining_mmbn5.json"
 27 | timestamp = time.time()
 28 | jsonData ={}
 29 | 
 30 | def get_user_id(username):
 31 |     #getting the userid first from their username
 32 |     try:
 33 |         data = srcomapi.get(f"/users/{quote(username)}")
 34 |         return data['data']['id']
 35 |     except KeyError:
 36 |         print("Invalid username or API error")
 37 |         return None
 38 |     except requests.exceptions.RequestException as e:
 39 |         print(f"Network error: {e}")
 40 |         return None
 41 | 
 42 | def get_game_id(game):
 43 |     data = srcomapi.get(f"/games?abbreviation={game}&max=1&_bulk=yes")
 44 | 
 45 |     game_id = data["data"][0]["id"]
 46 |     return game_id
 47 | 
 48 | def get_personal_bests(user_id):
 49 |     run_ids = []
 50 |     url = f"/users/{user_id}/personal-bests?embed=game,category"
 51 |     try:
 52 |         # Fetch all personal bests in a single request
 53 |         data = srcomapi.get(url)
 54 |         # Extract the runs from the response
 55 |         if data and 'data' in data:
 56 |             for pb in data['data']:
 57 |                 pb = pb['run']['id']
 58 |                 run_ids.append(pb)
 59 |             return set(run_ids)
 60 |         else:
 61 |             print("No personal bests found or invalid response from the API.")
 62 |             return []
 63 |     except requests.exceptions.RequestException as e:
 64 |         print(f"Error fetching personal bests: {e}")
 65 |         return []
 66 | 
 67 | def get_all_runs(user_id):
 68 |     #gettign all runs with pagination in mind.
 69 |     runs = []
 70 |     offset = 0
 71 |     direction = "asc"
 72 |     last_id = ""
 73 | 
 74 |     while True:
 75 |         url = f"/runs?user={user_id}&max=200&offset={offset}&status=verified&embed=game,category,players&direction={direction}&orderby=date"
 76 |         try:
 77 |             data = srcomapi.get(url)
 78 |             if last_id:
 79 |                 found_duplicate = False
 80 |                 for index, run in enumerate(data['data']):
 81 |                     if run['id'] == last_id:
 82 |                         runs.extend(data['data'][0:index])
 83 |                         found_duplicate = True
 84 |                         break
 85 |                 if found_duplicate:
 86 |                     break
 87 |             runs.extend(data['data'])
 88 |             # Pagination check
 89 |             if data['pagination']['size'] < 200:
 90 |                 break
 91 |             offset += 200
 92 |             if offset >= 10_000:
 93 |                 if not last_id:
 94 |                     last_id = runs[-1]["id"]
 95 |                     direction = "desc"
 96 |                     offset = 0
 97 |                 else:
 98 |                     break
 99 |         except requests.exceptions.RequestException as e:
100 |             print(f"Error fetching runs: {e}")
101 |             break
102 |     return runs
103 | 
104 | def get_all_runs_from_game(game_id):
105 |     runs = []
106 |     offset = 0
107 |     direction = "asc"
108 |     last_id = ""
109 | 
110 |     while True:
111 |         url = f"/runs?game={game_id}&max=200&offset={offset}&status=verified&embed=game,category,players&direction={direction}&orderby=date"
112 |         try:
113 |             print(f"offset: {offset}")
114 |             data = srcomapi.get(url)
115 |             if last_id:
116 |                 found_duplicate = False
117 |                 for index, run in enumerate(data['data']):
118 |                     if run['id'] == last_id:
119 |                         runs.extend(data['data'][0:index])
120 |                         found_duplicate = True
121 |                         break
122 |                 if found_duplicate:
123 |                     break
124 |             runs.extend(data['data'])
125 | 
126 |             # Pagination check
127 |             if data['pagination']['size'] < 200:
128 |                 break
129 |             offset += 200
130 |             if offset >= 10_000:
131 |                 if not last_id:
132 |                     last_id = runs[-1]["id"]
133 |                     direction = "desc"
134 |                     offset = 0
135 |                 else:
136 |                     break
137 |         except requests.exceptions.RequestException as e:
138 |             print(f"Error fetching runs: {e}")
139 |             break
140 | 
141 |     return runs
142 | 
143 | twitch_url_regex = re.compile(r"(https?:\/\/)?(?:\w+\.)?twitch\.tv\/\S*", re.IGNORECASE)
144 | 
145 | IS_NOT_TWITCH_URL = 0
146 | IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL = 1
147 | IS_TWITCH_VIDEO_URL = 2
148 | 
149 | def is_twitch_video_url(url):
150 |     # Checking with regex if its a twitch highlight
151 |     is_base_twitch_url = twitch_url_regex.search(url)
152 |     if is_base_twitch_url:
153 |         if twitch_current_url_regex.search(url) or twitch_c_v_url_regex.search(url):
154 |             return IS_TWITCH_VIDEO_URL
155 |         else:
156 |             return IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL
157 |     else:
158 |         return IS_NOT_TWITCH_URL
159 | 
160 | #Checking if a stream is live. Only happens if its an old dead link that redirects to the channel and the channel is live
161 | def filter_live(info):
162 |     # If the video is live, return a string indicating the reason for skipping.
163 |     if info.get('is_live', False):
164 |         return "Skipping live stream"
165 |     # Otherwise, return None to allow the video.
166 |     return None
167 | 
168 | async def process_runs(runs, client, ignore_links_in_description):
169 |     #Extract Twitch highlight urls from runs
170 |     highlights = []
171 |     all_twitch_urls = []
172 |     for run in runs:
173 |         videos = run.get('videos') or {}
174 |         links = videos.get('links') or []
175 |         twitch_urls = []
176 |         if ignore_links_in_description and links:
177 |             links = [links[-1]]
178 |         for video in links:
179 |             uri = video.get('uri', '')
180 |             result = is_twitch_video_url(uri)
181 |             if result == IS_TWITCH_VIDEO_URL:
182 |                 twitch_urls.append(uri)
183 |             elif result == IS_TWITCH_URL_BUT_NOT_TWITCH_VIDEO_URL:
184 |                 print(f"Skipped non-video twitch url {uri}")
185 | 
186 |         if len(twitch_urls) != 0:
187 |             player_twitch_yt_urls = []
188 |             player_datas = run["players"]["data"]
189 |             player_names = []
190 |             for player in player_datas:
191 |                 if player["rel"] == "guest":
192 |                     player_names.append(player["name"])
193 |                 else:
194 |                     twitch_info = player.get("twitch")
195 |                     if twitch_info is not None:
196 |                         player_twitch_yt_urls.append(twitch_info["uri"])
197 | 
198 |                     youtube_info = player.get("youtube")
199 |                     if youtube_info is not None:
200 |                         player_twitch_yt_urls.append(youtube_info["uri"])
201 | 
202 |                     player_names.append(player["names"]["international"])
203 | 
204 |             highlight = {
205 |                 'players': player_names,
206 |                 'game': run['game']['data']['names']['international'],
207 |                 'abbreviation': run['game']['data']['abbreviation'],
208 |                 'category': run['category']['data']['name'],
209 |                 'time': run['times']['primary'],
210 |                 'urls': twitch_urls,
211 |                 'run_id': run['id'],
212 |                 'submitted': run.get('submitted', 'Unknown date'),
213 |                 'date': run.get('date', 'Unknown date'),
214 |                 'comment': run.get('comment', '')
215 |             }
216 | 
217 |             all_twitch_urls.extend(twitch_urls)
218 |             if len(player_twitch_yt_urls) != 0:
219 |                 highlight["vod_sites"] = player_twitch_yt_urls
220 | 
221 |             highlights.append(highlight)
222 | 
223 |     if client.twitch is not None:
224 |         await client.fetch_info(all_twitch_urls)
225 |         client.write_twitch_users_at_risk()
226 | 
227 |     return highlights
228 | 
229 | def format_date_of_submission(dateobj):
230 |     try:
231 |         formatted_date = datetime.fromisoformat(dateobj).strftime("%B %d, %Y")
232 |     except (KeyError, ValueError, TypeError):
233 |         formatted_date = "Unknown date"
234 |     return formatted_date
235 | 
236 | def save_highlights(highlights, client, is_game, highlights_filename, remaining_downloads_filename, highlights_json_filename):
237 |     #saving all highlights in a formatted way for the user i guess? My hope is I can automate uploads later
238 |     num_at_risk = 0
239 | 
240 |     for highlight in highlights:
241 |         new_twitch_urls = []
242 |         at_risk = False
243 |         for twitch_url in highlight["urls"]:
244 |             if not is_game:
245 |                 at_risk = True
246 |             else:
247 |                 at_risk = client.is_video_at_risk(twitch_url)
248 | 
249 |             if at_risk:
250 |                 new_twitch_urls.append(f"{twitch_url}*****")
251 |             else:
252 |                 new_twitch_urls.append(twitch_url)
253 | 
254 |         highlight["urls"] = new_twitch_urls
255 |         highlight["at_risk"] = at_risk
256 |         if at_risk:
257 |             num_at_risk += 1
258 | 
259 |     print(f"Number of at-risk runs: {num_at_risk}")
260 | 
261 |     with open(highlights_filename, "w", encoding="utf-8") as f:
262 |         for entry in highlights:
263 |             #formatting the iso format
264 | 
265 |             f.write(f"Players: {', '.join(entry['players'])}\n")
266 |             f.write(f"Category: {entry['category']}\n")
267 |             f.write(f"Time: {str(parse_duration(entry['time']))}\n")
268 |             f.write(f"Submitted Date: {format_date_of_submission(entry['submitted'])}\n")
269 |             f.write(f"Run Date: {format_date_of_submission(entry['date'])}\n")
270 |             f.write(f"URL: {' '.join(entry['urls'])}\n")
271 |             f.write(f"SRC Link: https://speedrun.com/{entry['abbreviation']}/runs/{entry['run_id']}\n")
272 |             f.write(f"Channel exceeds 100h limit: {entry['at_risk']}\n")
273 |             f.write(f"Comment: {entry['comment']}\n")
274 |             vod_sites = entry.get("vod_sites")
275 |             if vod_sites is not None:
276 |                 f.write(f"Vod sites: {' '.join(vod_sites)}\n")
277 | 
278 |             f.write("-" * 50 + "\n")
279 | 
280 |     urls = []
281 |     for entry in highlights:
282 |         src_link = f"https://speedrun.com/{entry['abbreviation']}/runs/{entry['run_id']}"
283 |         urls.extend((url, src_link) for url in entry["urls"])
284 | 
285 |     with open(remaining_downloads_filename, "w", encoding="utf-8") as f:
286 |         json.dump(urls, f, indent=4)
287 |     with open(highlights_json_filename, "w", encoding="utf-8") as f:
288 |         json.dump(highlights, f, indent=4)
289 | 
290 | 
291 | def print_exception(e, additional_msg=""):
292 |     error_msg = e.args[0] if len(e.args) >= 1 else "(Not provided)"
293 | 
294 |     output = f"""\
295 | 
296 | 
297 | 
298 | ================================================================
299 | ======================== ERROR OCCURRED ========================
300 | {additional_msg}{error_msg}
301 | ================================================================
302 | 
303 | -- DEBUG INFORMATION --
304 | Error type: {e.__class__.__name__}
305 | Traceback (most recent call last)
306 | {''.join(traceback.format_tb(e.__traceback__))}"""
307 | 
308 |     print(output)
309 | 
310 | class DesiredQuality:
311 |     __slots__ = ("download_best", "desired_height", "fallback_should_increase_quality")
312 | 
313 |     def __init__(self, download_best, desired_height, fallback_should_increase_quality):
314 |         self.download_best = download_best
315 |         self.desired_height = desired_height
316 |         self.fallback_should_increase_quality = fallback_should_increase_quality
317 | 
318 |     @classmethod
319 |     def from_string(cls, input_str):
320 |         input_str = input_str.strip()
321 |         if input_str == "best":
322 |             return cls(True, 0, False)
323 | 
324 |         if input_str.startswith("<="):
325 |             fallback_should_increase_quality = False
326 |             input_str = input_str[2:]
327 |         elif input_str.startswith(">="):
328 |             fallback_should_increase_quality = True
329 |             input_str = input_str[2:]
330 |         else:
331 |             fallback_should_increase_quality = True
332 | 
333 |         if input_str[-1] == "p":
334 |             input_str = input_str[:-1]
335 | 
336 |         try:
337 |             desired_height = int(input_str)
338 |         except ValueError:
339 |             raise RuntimeError(f"Invalid format for `video-quality` (got: {input_str}). Please specify the video quality or desired height of the video, e.g. 360p, 720, 1080, 542. You can also add >= or <= before the quality to tell the program whether to download the closest higher quality or closest lower quality, respectively, if the quality does not exist. If you omit >= and <=, it defaults to choosing the closest higher quality.")
340 | 
341 |         return cls(False, desired_height, fallback_should_increase_quality)
342 | 
343 | class QualityPostprocessor(yt_dlp.postprocessor.PostProcessor):
344 |     __slots__ = ("desired_height", "fallback_should_increase_quality")
345 | 
346 |     def __init__(self, desired_quality):
347 |         super(QualityPostprocessor, self).__init__(None)
348 |         self.desired_height = desired_quality.desired_height
349 |         self.fallback_should_increase_quality = desired_quality.fallback_should_increase_quality
350 | 
351 |     @staticmethod
352 |     def is_format_source(quality_format):
353 |         # No hard and fast rule, so test multiple things
354 |         if "source" in quality_format["format_id"].lower() or "source" in quality_format.get("format_note", "").lower() or "source" in quality_format.get("format", "").lower():
355 |             return True
356 |         else:
357 |             return False
358 | 
359 |     def run(self, info):
360 |         best_height = 0
361 |         best_tbr = 0
362 |         best_format_id = None
363 |         source_format = None
364 |         source_format_id = None
365 | 
366 |         formats_sorted_by_height = sorted(info["formats"], key=lambda x: x.get("height", 0))
367 | 
368 |         #with open("video_info.json", "w+") as f:
369 |         #    json.dump(info, f, indent=2)
370 |         #
371 |         #with open("formats_sorted_by_height.json", "w+") as f:
372 |         #    json.dump(formats_sorted_by_height, f, indent=2)
373 | 
374 |         #print(f"formats_sorted_by_height: {formats_sorted_by_height}")
375 |         for quality_format in formats_sorted_by_height:
376 |             if quality_format["vcodec"] == "none":
377 |                 #print(f"Continued {quality_format}")
378 |                 continue
379 | 
380 |             format_id = quality_format["format_id"]
381 |             # some videos e.g. https://www.twitch.tv/videos/118628100
382 |             # have no height associated with some formats
383 |             # not really sure how to integrate this into the current quality filtering logic, so just skip these for now
384 |             height = quality_format.get("height")
385 |             if height is None:
386 |                 continue
387 | 
388 |             tbr = quality_format["tbr"]
389 |             is_source = QualityPostprocessor.is_format_source(quality_format)
390 | 
391 |             if is_source:
392 |                 source_format = quality_format
393 | 
394 |             #print(f"best_height: {best_height}, height: {height}, self.desired_height: {self.desired_height}, is_source: {is_source}, quality_format: {quality_format}\n\n\n")
395 | 
396 |             if best_height == 0 or height < self.desired_height:
397 |                 best_height = height
398 |                 best_tbr = tbr
399 |                 best_format_id = format_id
400 |             # edge case for when there are multiple formats with the same height and we have to choose between them
401 |             elif height == self.desired_height:
402 |                 # if the best height isn't even the desired height yet, then set it so
403 |                 # otherwise, it is, and we need to choose out of the two which to pick
404 |                 # I think this only happens when one is source quality
405 | 
406 |                 if best_height != self.desired_height or is_source:
407 |                     best_height = height
408 |                     best_tbr = tbr
409 |                     best_format_id = format_id
410 |             # only do this logic if we want to fallback to a higher quality
411 |             # if the height we chose doesn't match the desired height
412 |             elif self.fallback_should_increase_quality:
413 |                 # if the current best height is less than the desired height, and we want to fallback to quality higher
414 |                 # edge case to pick the source quality when we meet qualities with the same height
415 |                 if best_height < self.desired_height or (best_height == height and is_source):
416 |                     best_height = height
417 |                     best_tbr = tbr
418 |                     best_format_id = format_id
419 | 
420 |         # Sometimes, the source format size can be less than encoded formats at a lower resolution
421 |         # if this is true for the best format we picked, then choose the source format
422 |         if source_format is not None and source_format.get("tbr") is not None and best_tbr is not None and source_format["tbr"] < best_tbr:
423 |             best_format_id = source_format["format_id"]
424 | 
425 |         # include audio format just in case somehow, the best video format has no audio
426 |         new_formats = [quality_format for quality_format in info["formats"] if quality_format["format_id"] == best_format_id or (quality_format["acodec"] != "none" and quality_format["vcodec"] == "none")]
427 | 
428 |         # if we somehow can't find any formats, then just try to download anything
429 |         if len(new_formats) != 0:
430 |             info["formats"] = new_formats
431 | 
432 |         #print(f"Post processor info: {info}")
433 | 
434 |         return [], info
435 | 
436 | def download_videos(remaining_downloads_filename, video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, allow_all, desired_quality, concurrent_fragments):
437 |     #pathlib.Path(download_folder_name).mkdir(parents=True, exist_ok=True)
438 |     #downloading videos out of the provided dict using the yt-dlp module.
439 | 
440 |     download_info_template = """\
441 | URL: %(original_url)s
442 | speedrun.com URL: {src_url}
443 | Channel: %(uploader_id)s
444 | Title: %(title)s
445 | Date: %(upload_date>%Y-%m-%d)s
446 | Duration: %(duration>%H:%M:%S)s
447 | Description:
448 | %(description)s
449 | =========================================================="""
450 | 
451 |     print_to_file_list = [[download_info_template, downloaded_video_info_filename]]
452 | 
453 |     ydl_options = {
454 |         'format': "bestvideo+bestaudio/best",
455 |         'outtmpl': f'{video_folder_name}/{download_type_str}/{game_or_username}/%(title)s_%(id)s_%(format_id)s.%(ext)s',
456 |         'noplaylist': True,
457 |         'match_filter': filter_live, #uses a function to determine if the dead link now links to a stream and accidentially starts to download this instead. Hopefully should skip livestreams
458 |         "print_to_file": {"after_video": print_to_file_list},
459 |         'verbose': True, # for debugging stuff
460 |         'sleep-interval': 5, #so i dont get insta blacklisted by twitch
461 |         'retries': 1,  # Retry a second time a bit later in case there was simply an issue
462 |         'retry-delay': 10,  # Wait 10 seconds before retrying
463 |         'concurrent_fragment_downloads': concurrent_fragments,
464 |     }
465 | 
466 |     if desired_quality.download_best:
467 |         quality_postprocessor = None
468 |     else:
469 |         quality_postprocessor = QualityPostprocessor(desired_quality)
470 | 
471 |     while True:
472 |         try:
473 |             # Load URLs from JSON file
474 |             with open(remaining_downloads_filename, "r", encoding="utf-8") as f:
475 |                 urls = json.load(f)
476 | 
477 |             # Stop if no URLs are left
478 |             if not urls:
479 |                 print("All downloads completed!")
480 |                 break
481 | 
482 |             url_info = urls[0]
483 |             if isinstance(url_info, list):
484 |                 current_url, src_link = url_info
485 |             else:
486 |                 current_url = url_info
487 |                 src_link = "N/A"
488 | 
489 |             sleep_time = 15
490 |             if allow_all or current_url.endswith("*****"):
491 |                 clean_url = current_url.replace("*****", "") # Cleaning up the extraspacing
492 |                 print(f"Downloading: {clean_url}")
493 |                 print_to_file_list[0][0] = download_info_template.format(src_url=src_link)
494 |                 with yt_dlp.YoutubeDL(ydl_options) as ydl:
495 |                     if quality_postprocessor is not None:
496 |                         ydl.add_post_processor(quality_postprocessor, when="pre_process")
497 | 
498 |                     try:
499 |                         ydl.download([clean_url])
500 |                     except Exception as e:
501 |                         error_msg = e.args[0] if len(e.args) >= 1 else ""
502 |                         # Video does not exist
503 |                         # video_does_not_exist_regex = re.compile(r"Video \w+ does not exist", flags=re.IGNORECASE) <-- seemed not to work. as a quick fix i disabled it and check manually
504 |                         if ("does not exist" in error_msg) or ("The channel is not currently live" in error_msg):
505 |                             print(f"Skipping invalid or dead link: {clean_url}")
506 |                             with open(downloaded_video_info_filename, "a+") as f:
507 |                                 f.write(f"{clean_url} for {src_link} does not exist\n==========================================================\n")
508 |                             #sleep_time = 15
509 | 
510 |                         else:
511 |                             print_exception(e, f"Failed to download {clean_url}: ")
512 |                             with open(downloaded_video_info_filename, "a+") as f:
513 |                                 f.write(f"Failed to download {clean_url}: {error_msg}\n==========================================================\n")
514 |             else:
515 |                 print(f"Skipping {current_url} (not marked as at-risk)")
516 |                 sleep_time = 0
517 | 
518 |             urls.pop(0)
519 |             with open(remaining_downloads_filename, "w", encoding="utf-8") as f:
520 |                 json.dump(urls, f, indent=4)
521 |             if sleep_time != 0:
522 |                 print(f"Waiting {sleep_time} seconds before downloading the next video.")
523 |                 time.sleep(sleep_time)
524 |         except FileNotFoundError:
525 |             print("No remaining downloads file found")
526 |             break
527 |         except json.JSONDecodeError:
528 |             print("Error reading JSON file")
529 |             break
530 |         except KeyboardInterrupt:
531 |             print("\nDownload interrupted by user. Progress saved.")
532 |             with open(remaining_downloads_filename, "w", encoding="utf-8") as f:
533 |                 json.dump(urls, f, indent=4)
534 |             break
535 |         except Exception as e:
536 |             print_exception(e, "Unexpected error: ")
537 |             print(f"Unexpected error: {e}")
538 |             break
539 | 
540 | def load_remaining_downloads(remaining_downloads_filename):
541 |     try:
542 |         with open(remaining_downloads_filename, "r", encoding="utf-8") as f:
543 |             urls = json.load(f)
544 |         if not urls:
545 |             print("No remaining downloads file found")
546 |             return None
547 |         return urls
548 |     except FileNotFoundError:
549 |         print("No remaining downloads file found")
550 |     except json.JSONDecodeError:
551 |         print("Error reading JSON file")
552 |     except Exception as e:
553 |         print(f"Unexpected error: {e}")
554 | 
555 | def convert_bool(value):
556 |     value_str_lower = value.lower()
557 |     if value_str_lower == "true":
558 |         return True
559 |     elif value_str_lower == "false":
560 |         return False
561 |     else:
562 |         raise configargparse.ArgumentTypeError(f"Invalid bool type (must be `true` or `false`, got {value})")
563 | 
564 | def process_personal_bests(runs, pb_ids):
565 |     return [run for run in runs if run["id"] in pb_ids]
566 | 
567 | async def main():
568 |     ap = configargparse.ArgumentParser(
569 |         allow_abbrev=False,
570 |         config_file_parser_class=configargparse.YAMLConfigFileParser,
571 |         config_file_open_func=lambda filename: open(
572 |             filename, "r", encoding="utf-8"
573 |         )
574 |     )
575 | 
576 |     ap.add_argument("-cfg", "--config", dest="config", default="config.yml", is_config_file=True, help="Alternative config file to put in command line arguments. Arguments provided on the command line will override arguments provided in the config file, if specified.")
577 |     ap.add_argument("--game", dest="game", default=None, help="The game of the leaderboard you want to scrape for Twitch links. Either this or `username:` must be specified")
578 |     ap.add_argument("--username", dest="username", default=None, help="The speedrun.com username for the runs you want to scrape for Twitch links. Either this or `game:` must be specified")
579 |     ap.add_argument("--app-id", dest="app_id", default=None, help="Name of the Twitch API App ID used for checking if a user has 100 or more hours of highlights. Required for game download. Not necessary for username download.")
580 |     ap.add_argument("--app-secret", dest="app_secret", default=None, help="Name of the Twitch API App Secret. See `app-id:` for more info")
581 |     ap.add_argument("--video-folder-name", dest="video_folder_name", default="videos", help="Folder where the videos will be stored. Videos will automatically be sorted by game and username. Will be created if it doesn't exist already. Default is a folder \"videos\" in the same directory as the script")
582 |     ap.add_argument("--cache-filename", dest="cache_filename", default="twitch_cache.json", help="File containing information about users' videos from the Twitch API (for determining if a user has >= 100 hours of highlights). Default is twitch_cache.json")
583 |     ap.add_argument("--download-videos", dest="download_videos", type=convert_bool, help="Whether to download videos after scraping them from speedrun.com", required=True)
584 |     ap.add_argument("--allow-all", dest="allow_all", type=convert_bool, help="Whether to download all found videos regardless of whether or not the channel they exist on have reached the >=100h highlight limit.", required=True)
585 |     ap.add_argument("--video-quality", dest="video_quality", default="best", help="Desired closest video quality that you want to download. For this option, specify the video quality or desired height of the video, e.g. 360p, 720, 1080, 542. Choosing \"best\" will just download the best quality available. THIS OPTION SHOULD BE IN QUOTES, i.e. do \"360p\", not 360p. You can also add >= or <= before the quality to tell the program whether to download the closest higher quality or closest lower quality, respectively, if the quality does not exist. If you omit >= and <=, it defaults to choosing the closest higher quality. Defaults to \"best\".")
586 |     ap.add_argument("--ignore-links-in-description", dest="ignore_links_in_description", type=convert_bool, help="Whether to ignore twitch links that are in the video description or not. By default this is disabled.", required=True)
587 |     ap.add_argument("--concurrent-fragments", dest="concurrent_fragments", type=int, help="How many concurrent fragments to download of a video. By default this is 1.")
588 |     ap.add_argument("--safe-only-pbs", dest="save_only_pbs", type=convert_bool,help="If set to true, only the PBs of the runner or all PBs on the leaderboard are being saved.",required=True)
589 |     args = ap.parse_args()
590 | 
591 |     desired_quality = DesiredQuality.from_string(args.video_quality)
592 | 
593 |     print(f"Using quality: {args.video_quality}")
594 | 
595 |     if args.game and args.username:
596 |         raise RuntimeError("Only one of `username:` or `game:` must be specified in config.yml!")
597 | 
598 |     game = args.game
599 |     username = args.username
600 |     if game:
601 |         download_type_str = "game"
602 |         game_or_username = game
603 |         is_game = True
604 |     elif not username:
605 |         raise RuntimeError("One of `username:` or `game:` must be specified in config.yml!")
606 |     else:
607 |         download_type_str = "user"
608 |         game_or_username = username
609 |         is_game = False
610 | 
611 |     base_output_dirpath = pathlib.Path(f"output/{download_type_str}/{game_or_username}")
612 |     base_output_dirpath.mkdir(parents=True, exist_ok=True)
613 | 
614 |     highlights_filename = f"{base_output_dirpath}/twitch_highlights.txt"
615 |     highlights_json_filename = f"{base_output_dirpath}/twitch_highlights.json"
616 |     remaining_downloads_filename = f"{base_output_dirpath}/remaining_downloads.json"
617 |     downloaded_video_info_filename = f"{base_output_dirpath}/download_info.txt"
618 | 
619 |     concurrent_fragments = args.concurrent_fragments or 1
620 | 
621 |     #Check if there are remaining Downloads left.
622 |     remaininDownloads = load_remaining_downloads(remaining_downloads_filename)
623 |     if remaininDownloads and input("A remaining downloads file has been found. Do you want to continue the download? (y/n): ").lower().startswith("y"):
624 |         download_videos(remaining_downloads_filename, args.video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, args.allow_all, desired_quality, concurrent_fragments)
625 |         return
626 | 
627 |     if is_game:
628 |         print(f"Searching for {game}...")
629 |         game_id = get_game_id(game)
630 |         print(f"Getting all runs")
631 |         runs = get_all_runs_from_game(game_id)
632 |     else:
633 |         print(f"Searching for {username}...")
634 |         # Getting the user id first from the username.
635 |         user_id = get_user_id(username)
636 |         if not user_id:
637 |             print("User not found")
638 |             return
639 | 
640 |         # Fetch all runs from user
641 |         print("Fetching runs...")
642 |         runs = get_all_runs(user_id)
643 |         if args.save_only_pbs:
644 |             pb_ids = get_personal_bests(user_id)
645 |             runs = process_personal_bests(runs, pb_ids)
646 | 
647 |     print(f"Found {len(runs)} verified runs")
648 | 
649 |     if (args.app_id is None or args.app_secret is None) and is_game:
650 |         raise RuntimeError("Twitch integration must be present if you are requesting a game to be downloaded")
651 |     client = await twitch_integration.TwitchClient.init(args)
652 |     # Checking for highlights
653 |     highlights = await process_runs(runs, client, args.ignore_links_in_description)
654 |     print(f"Found {len(highlights)} Twitch highlights")
655 | 
656 |     # Save highlights
657 |     save_highlights(highlights, client, is_game, highlights_filename, remaining_downloads_filename, highlights_json_filename)
658 |     print(f"Saved highlights to {highlights_filename}")
659 | 
660 |     # Download prompt for users and downloading videos
661 |     if highlights and args.download_videos:
662 |         download_videos(remaining_downloads_filename, args.video_folder_name, downloaded_video_info_filename, download_type_str, game_or_username, args.allow_all, desired_quality, concurrent_fragments)
663 |         print("Download completed")
664 | 
665 | if __name__ == "__main__":
666 |     try:
667 |         asyncio.run(main())
668 |     except Exception as e:
669 |         print_exception(e)
670 |         sys.exit(1)
671 | 


--------------------------------------------------------------------------------
/srcomapi.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | import requests
  3 | import urllib
  4 | import pathlib
  5 | import json
  6 | import time
  7 | import re
  8 | import sys
  9 | 
 10 | class CacheSettings:
 11 |     __slots__ = ("read_cache", "write_cache", "cache_dirname", "rate_limit", "retry_on_empty")
 12 | 
 13 |     def __init__(self, read_cache, write_cache, cache_dirname, rate_limit):
 14 |         self.read_cache = read_cache
 15 |         self.write_cache = write_cache
 16 |         self.cache_dirname = cache_dirname
 17 |         self.rate_limit = rate_limit
 18 | 
 19 | default_cache_settings = CacheSettings(True, True, "srcom_cached", True)
 20 | 
 21 | def get_cached_endpoint_filepath(endpoint, params, cache_settings):
 22 |     endpoint_as_pathname = f"{cache_settings.cache_dirname}/{urllib.parse.quote(endpoint, safe='')}_q_{urllib.parse.urlencode(params, doseq=True)}.json"
 23 | 
 24 |     return pathlib.Path(endpoint_as_pathname)
 25 | 
 26 | API_URL = "https://www.speedrun.com/api/v1"
 27 | 
 28 | def get(endpoint, params=None, cache_settings=None, require_success=False):
 29 |     exception_sleep_time = 15
 30 | 
 31 |     while True:
 32 |         try:
 33 |             return get_in_loop_code(endpoint, params, cache_settings)[0]
 34 |         except ConnectionError as e:
 35 |             print(f"Exception occurred: {e}\n{''.join(traceback.format_tb(e.__traceback__))}\nSleeping for {exception_sleep_time} seconds now.")
 36 |             time.sleep(exception_sleep_time)
 37 |             exception_sleep_time *= 2
 38 |             if exception_sleep_time > 1000:
 39 |                 exception_sleep_time = 1000
 40 | 
 41 | def get_in_loop_code(endpoint, params, cache_settings):
 42 |     if params is None:
 43 |         params = {}
 44 | 
 45 |     if cache_settings is None:
 46 |         cache_settings = default_cache_settings
 47 | 
 48 |     endpoint_as_path = get_cached_endpoint_filepath(endpoint, params, cache_settings)
 49 |     if cache_settings.read_cache and endpoint_as_path.is_file():
 50 |         error_code = None
 51 | 
 52 |         endpoint_as_path_size = endpoint_as_path.stat().st_size
 53 |         if endpoint_as_path_size == 0:
 54 |             return {}, 404
 55 | 
 56 |         #print(f"endpoint_as_path: {endpoint_as_path}")
 57 |         with open(endpoint_as_path, "r", encoding="utf-8") as f:
 58 |             data = json.load(f)
 59 | 
 60 |         if error_code is None:
 61 |             return data, 200
 62 | 
 63 |     url = f"{API_URL}{endpoint}"
 64 |     print(f"url: {url}?{urllib.parse.urlencode(params, doseq=True)}")
 65 |     start_time = time.time()
 66 |     r = requests.get(url, params=params)
 67 |     end_time = time.time()
 68 |     print(f"Request took {end_time - start_time}.")
 69 | 
 70 |     if cache_settings.write_cache:
 71 |         endpoint_as_path.parent.mkdir(parents=True, exist_ok=True)
 72 | 
 73 |     if r.status_code != 200:
 74 |         if r.status_code >= 400 and r.status_code < 500:
 75 |             raise RuntimeError(f"API returned {r.status_code}: {r.reason}")
 76 | 
 77 |         raise ConnectionError(f"Got status code {r.status_code}!")
 78 |         #return r.reason, r.status_code
 79 |         #if r.status_code != 404:
 80 |         #    raise ConnectionError(f"Got status code {r.status_code}!")
 81 |         #
 82 |         #if cache_settings.write_cache:
 83 |         #    if r.status_code == 404:
 84 |         #        endpoint_as_path.touch()
 85 |         #    else:
 86 |         #        print(f"Got non-404 error code: {r.status_code}")
 87 |         #        with open(endpoint_as_path, "w+") as f:
 88 |         #            f.write(str(r.status_code))
 89 |         #
 90 |         #return r.reason, r.status_code
 91 | 
 92 |     data = r.json()
 93 | 
 94 |     if cache_settings.write_cache:
 95 |         endpoint_as_path.parent.mkdir(parents=True, exist_ok=True)
 96 |         data_as_str = json.dumps(data, separators=(",", ":"))
 97 |         exit_after_write = False
 98 |         while True:
 99 |             try:
100 |                 with open(endpoint_as_path, "w+", encoding="utf-8") as f:
101 |                     f.write(data_as_str)
102 |                 break
103 |             except KeyboardInterrupt:
104 |                 print("Saving speedrun.com API cache, please stop Ctrl-C'ing")
105 |                 exit_after_write = True
106 | 
107 |         if exit_after_write:
108 |             sys.exit(1)
109 | 
110 |     if cache_settings.rate_limit:
111 |         time.sleep(1)
112 | 
113 |     return data, r.status_code
114 | 


--------------------------------------------------------------------------------
/twitch_integration.py:
--------------------------------------------------------------------------------
  1 | from twitchAPI.twitch import Twitch
  2 | from twitchAPI.helper import first
  3 | import asyncio
  4 | import json
  5 | import pathlib
  6 | import itertools
  7 | import re
  8 | import sys
  9 | 
 10 | twitch_c_v_url_regex = re.compile(r"(?:https?:\/\/)?(?:\w+\.)?twitch\.tv\/(\w+)\/([cv])\/(\d+)", re.IGNORECASE)
 11 | twitch_current_url_regex = re.compile(r"(?:https?:\/\/)?(?:\w+\.)?twitch\.tv\/videos/(\d+)", re.IGNORECASE)
 12 | 
 13 | def grouper(iterable, n):
 14 |     it = iter(iterable)
 15 |     while True:
 16 |         chunk = list(itertools.islice(it, n))
 17 |         if not chunk:  # Stop when no more elements are left
 18 |             break
 19 |         yield chunk
 20 | 
 21 | duration_regex = re.compile(r"^(?:([0-9]+)h)?(?:([0-9]+)m)?(?:([0-9]+)s?)?$")
 22 | 
 23 | def parse_duration(duration):
 24 |     match_obj = duration_regex.match(duration.strip())
 25 |     if match_obj:
 26 |         hours = match_obj.group(1)
 27 |         minutes = match_obj.group(2)
 28 |         seconds = match_obj.group(3)
 29 |         if hours is None and minutes is None and seconds is None:
 30 |             raise RuntimeError(f"Invalid duration \"{expiry_time}\" provided for expiry time!")
 31 | 
 32 |         if hours is None:
 33 |             hours = 0
 34 |         if minutes is None:
 35 |             minutes = 0
 36 |         if seconds is None:
 37 |             seconds = 0
 38 | 
 39 |         try:
 40 |             duration_as_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds)
 41 |         except ValueError:
 42 |             raise RuntimeError(f"At least one of hours, seconds, and minutes not an integer!")
 43 |     else:
 44 |         raise RuntimeError(f"Invalid duration \"{expiry_time}\" provided for expiry time!")
 45 | 
 46 |     return duration_as_seconds
 47 | 
 48 | class UserCache:
 49 |     __slots__ = ("cache_filename", "cache_info")
 50 | 
 51 |     def __init__(self, cache_filename):
 52 |         cache_filepath = pathlib.Path(cache_filename)
 53 |         if cache_filepath.is_file():
 54 |             with open(cache_filename, "r") as f:
 55 |                 cache_info = json.load(f)
 56 |         else:
 57 |             cache_info = {
 58 |                 "video_infos": {},
 59 |                 "user_infos": {},
 60 |                 "total_duration": -1
 61 |             }
 62 | 
 63 |         self.cache_info = cache_info
 64 |         self.cache_filename = cache_filename
 65 | 
 66 |     def parse_valid_video_id(self, video_url, update_c=False):
 67 |         match_obj = twitch_c_v_url_regex.match(video_url)
 68 |         if match_obj:
 69 |             url_type = match_obj.group(2)
 70 |             if url_type == "c":
 71 |                 if update_c:
 72 |                     user_info = self.get_user_info(match_obj.group(1))
 73 |                     user_info["c_video_urls"].append(video_url)
 74 |                     print(f"Skipped c-type url {video_url}")
 75 |                 video_id = None
 76 |             else:
 77 |                 video_id = match_obj.group(3)
 78 |         else:
 79 |             match_obj = twitch_current_url_regex.match(video_url)
 80 |             if match_obj:
 81 |                 video_id = match_obj.group(1)
 82 |             else:
 83 |                 print(f"Skipped non-video url {video_url}")
 84 |                 video_id = None
 85 | 
 86 |         return video_id
 87 | 
 88 |     async def update_video_infos_from_video_urls(self, twitch, video_urls):
 89 |         valid_nonfound_video_ids = []
 90 |         print("Finding valid video ids!")
 91 |         for video_url in video_urls:
 92 |             video_id = self.parse_valid_video_id(video_url, update_c=True)
 93 |             if video_id is not None:
 94 |                 video_info = self.cache_info["video_infos"].get(video_id)
 95 |                 if video_info is None:
 96 |                     valid_nonfound_video_ids.append(video_id)
 97 | 
 98 |         if len(valid_nonfound_video_ids) != 0:
 99 |             print(f"Fetching video info from {len(valid_nonfound_video_ids)} valid video ids!")
100 |             for i, valid_nonfound_video_ids_chunk in enumerate(grouper(valid_nonfound_video_ids, 100)):
101 |                 print(f"video_ids_chunk: {valid_nonfound_video_ids_chunk}")
102 |                 print(f"Parsing chunk {100*i}")
103 |                 async for video_info_obj in twitch.get_videos(ids=valid_nonfound_video_ids_chunk, first=100):
104 |                     video_info = video_info_obj.to_dict()
105 |                     self.cache_info["video_infos"][video_info["id"]] = video_info
106 |             
107 |             valid_nonfound_video_ids_as_set = frozenset(valid_nonfound_video_ids)
108 |             found_video_info_ids = frozenset(self.cache_info["video_infos"].keys())
109 |             missing_video_ids = valid_nonfound_video_ids_as_set - found_video_info_ids
110 | 
111 |             for missing_video_id in missing_video_ids:
112 |                 self.cache_info["video_infos"][missing_video_id] = {"missing": True}
113 | 
114 |         self.save_cache()
115 | 
116 |     async def update_user_infos_from_video_infos(self, twitch):
117 |         for video_id, video_info in self.cache_info["video_infos"].items():
118 |             if video_info.get("missing"):
119 |                 continue
120 | 
121 |             username = video_info["user_login"]
122 |             user_info = self.get_user_info(username)
123 |             if len(user_info["videos"]) == 0:
124 |                 print(f"Downloading video info for {username}!")
125 |                 user_id = video_info["user_id"]
126 |                 num_video_infos = 0
127 |                 async for user_video_info_obj in twitch.get_videos(user_id=user_id, first=100):
128 |                     user_video_info = user_video_info_obj.to_dict()
129 |                     user_info["videos"][user_video_info["id"]] = user_video_info
130 |                     num_video_infos += 1
131 |                 
132 |                 print(f"num_video_infos: {num_video_infos}")
133 |                 self.save_cache()
134 | 
135 |     def determine_at_risk_users(self):
136 |         print(f"Determining at risk users!")
137 |         for username, user_info in self.cache_info["user_infos"].items():
138 |             total_duration = 0
139 |             for video_id, user_video_info in user_info["videos"].items():
140 |                 if user_video_info["type"] == "highlight":
141 |                     total_duration += parse_duration(user_video_info["duration"])
142 | 
143 |             user_info["total_duration"] = total_duration
144 | 
145 |         self.save_cache()
146 | 
147 |     def is_video_at_risk(self, video_url):
148 |         video_id = self.parse_valid_video_id(video_url)
149 |         if video_id is None:
150 |             return False
151 |     
152 |         video_info = self.cache_info["video_infos"].get(video_id)
153 |         if video_info is None or video_info.get("missing"):
154 |             # Want to report missing videos via yt-dlp
155 |             return True
156 | 
157 |         username = video_info["user_login"]
158 |         user_info = self.cache_info["user_infos"].get(username)
159 |         if user_info is None:
160 |             # Be safe and download the video if for some reason the username doesn't exist
161 |             return True
162 | 
163 |         if user_info["total_duration"] >= 360000:
164 |             return True
165 |         else:
166 |             return False
167 | 
168 |     def write_twitch_users_at_risk(self):
169 |         twitch_users_sorted_by_total_duration = sorted(self.cache_info["user_infos"].items(), key=lambda x: x[1]["total_duration"], reverse=True)
170 |         output = "".join(f"{username}: {user_info['total_duration']}\n" for username, user_info in twitch_users_sorted_by_total_duration)
171 | 
172 |         with open("output/twitch_users_sorted_by_total_duration.txt", "w+") as f:
173 |             f.write(output)
174 | 
175 |     def get_user_info(self, username):
176 |         user_info = self.cache_info["user_infos"].get(username)
177 |         if user_info is None:
178 |             user_info = {
179 |                 "c_video_urls": [],
180 |                 "videos": {}
181 |             }
182 |             self.cache_info["user_infos"][username] = user_info
183 |         return user_info
184 | 
185 |     def save_cache(self):
186 |         exit_after_write = False
187 |         cache_info_as_str = json.dumps(self.cache_info, indent=2)
188 |         while True:
189 |             try:
190 |                 with open(self.cache_filename, "w+") as f:
191 |                     f.write(cache_info_as_str)
192 | 
193 |                 break
194 |             except KeyboardInterrupt:
195 |                 print("Saving Twitch cache, please stop Ctrl-C'ing")
196 |                 exit_after_write = True
197 | 
198 |         if exit_after_write:
199 |             sys.exit(1)
200 | 
201 | class TwitchClient:
202 |     __slots__ = ("twitch", "user_cache")
203 | 
204 |     def __init__(self, args, twitch):
205 |         self.twitch = twitch
206 |         self.user_cache = UserCache(args.cache_filename)
207 | 
208 |     @classmethod
209 |     async def init(cls, args):
210 |         app_id = args.app_id
211 |         app_secret = args.app_secret
212 |         if app_id is None or app_secret is None:
213 |             twitch = None
214 |         else:
215 |             twitch = await Twitch(app_id, app_secret)
216 | 
217 |         return cls(args, twitch)
218 | 
219 |     async def fetch_info(self, video_urls):
220 |         await self.user_cache.update_video_infos_from_video_urls(self.twitch, video_urls)
221 |         await self.user_cache.update_user_infos_from_video_infos(self.twitch)
222 |         self.user_cache.determine_at_risk_users()
223 | 
224 |     def is_video_at_risk(self, video_url):
225 |         return self.user_cache.is_video_at_risk(video_url)    
226 | 
227 |     def write_twitch_users_at_risk(self):
228 |         self.user_cache.write_twitch_users_at_risk()
229 | 


--------------------------------------------------------------------------------