├── .github
    ├── FUNDING.yml
    └── ISSUE_TEMPLATE
    │   ├── 1_issue_report.yml
    │   ├── 2_feature_request.yml
    │   ├── 3_question.yml
    │   └── config.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── example-config.toml
├── isubrip
    ├── __init__.py
    ├── __main__.py
    ├── cli.py
    ├── commands
    │   ├── __init__.py
    │   └── download.py
    ├── config.py
    ├── constants.py
    ├── data_structures.py
    ├── logger.py
    ├── scrapers
    │   ├── __init__.py
    │   ├── appletv_scraper.py
    │   ├── itunes_scraper.py
    │   └── scraper.py
    ├── subtitle_formats
    │   ├── __init__.py
    │   ├── subrip.py
    │   ├── subtitles.py
    │   └── webvtt.py
    └── utils.py
├── pyproject.toml
└── uv.lock


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: MichaelYochpaz


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/1_issue_report.yml:
--------------------------------------------------------------------------------
  1 | name: Bug / Issue Report
  2 | description: Report a bug or an issue.
  3 | title: "[Issue]: "
  4 | labels: [bug]
  5 | body:
  6 |   - type: markdown
  7 |     attributes:
  8 |       value: >
  9 |         **Before opening an issue, please make sure you are running the latest version of iSubRip,
 10 |         and that there isn't an already-existing open issue for your issue under the [issues tab](https://github.com/MichaelYochpaz/iSubRip/labels/bug).**
 11 |   - type: checkboxes
 12 |     id: check-confirmation
 13 |     attributes:
 14 |       label: Confirmations
 15 |       options:
 16 |         - label: "I have checked the issues tab, and couldn't find an existing open issue for the issue I want to report."
 17 |           required: true
 18 |   - type: dropdown
 19 |     id: os-type
 20 |     attributes:
 21 |       label: OS Type
 22 |       description: The operation system that's being used to run iSubRip.
 23 |       options:
 24 |         - Windows
 25 |         - MacOS
 26 |         - Linux
 27 |     validations:
 28 |       required: true
 29 |   - type: input
 30 |     id: python-version
 31 |     attributes:
 32 |       label: Python Version
 33 |       description: |
 34 |         The Python version that's being used to run iSubRip.
 35 |         Can be checked by running `python --version`.
 36 |       placeholder: |
 37 |         Example: "3.10.6"
 38 |     validations:
 39 |       required: true
 40 |   - type: input
 41 |     id: version
 42 |     attributes:
 43 |       label: Package Version
 44 |       description: |
 45 |         iSubRip's version that's being used.
 46 |         Can be checked by running `python -m pip show isubrip`.
 47 |       placeholder: |
 48 |         Example: "2.3.2"
 49 |     validations:
 50 |       required: true
 51 |   - type: textarea
 52 |     id: description
 53 |     attributes:
 54 |       label: Description
 55 |       description: |
 56 |         A summary of the issue.
 57 |         Include as much information as possible, and steps to reproduce (if they're known).
 58 |         Log files (see README for more information) can be attached by clicking the area to highlight it, and then dragging & dropping files in.
 59 |     validations:
 60 |       required: true
 61 |   - type: textarea
 62 |     id: output-log
 63 |     attributes:
 64 |       label: Output Log
 65 |       description: |
 66 |         iSubRip's output when the issue occurred.
 67 |         Please include the command that was used to run iSubRip.
 68 |       render: Text
 69 |       placeholder: |
 70 |         Example:
 71 | 
 72 |         isubrip https://itunes.apple.com/us/movie/can-you-hear-us-now/id1617191490
 73 |         Scraping https://itunes.apple.com/us/movie/can-you-hear-us-now/id1617191490...
 74 |         Found movie: Can You Hear Us Now?
 75 |         Traceback (most recent call last):
 76 |           File "%appdata%\local\programs\python\python38-32\lib\runpy.py", line 193, in _run_module_as_main
 77 |             return _run_code(code, main_globals, None,
 78 |           File "%appdata%\local\programs\python\python38-32\lib\runpy.py", line 86, in _run_code
 79 |             exec(code, run_globals)
 80 |           File "%appdata%\local\programs\python\python38-32\scripts\isubrip.exe\__main__.py", line 7, in <module>
 81 |           File "%appdata%\local\programs\python\python38-32\lib\site-packages\isubrip\__main__.py", line 91, in main
 82 |             os.makedirs(current_download_path, exist_ok=True)
 83 |           File "%appdata%\local\programs\python\python38-32\lib\os.py", line 221, in makedirs
 84 |             mkdir(name, mode)
 85 |         OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'C:\\%appdata%\\Local\\Temp\\iSubRip\\Can.You.Hear.Us.Now?.iT.WEB'
 86 |     validations:
 87 |       required: true
 88 |   - type: textarea
 89 |     id: config
 90 |     attributes:
 91 |       label: Config
 92 |       description: |
 93 |         The iSubRip config file you are using.
 94 |         **Leave empty only if there is no config file in use.**
 95 |       render: TOML
 96 |       placeholder: |
 97 |         Example:
 98 | 
 99 |         [downloads]
100 |         folder = "C:\\Subtitles\\iTunes"
101 |         languages = ["en-US", "fr-FR", "he"]
102 |         zip = false
103 | 
104 |         [subtitles]
105 |         convert-to-srt = true
106 |         fix-rtl = true
107 |     validations:
108 |       required: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/2_feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Request a new feature or improvement to an existing one.
 3 | title: "[Feature Request]: "
 4 | labels: [feature-request]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: >
 9 |         **Before opening an issue, please make sure there  isn't an already-existing issue, open or closed,
10 |         for this feature request under the [issues tab](https://github.com/MichaelYochpaz/iSubRip/issues?q=label%3Afeature-request).**
11 |   - type: checkboxes
12 |     id: check-confirmation
13 |     attributes:
14 |       label: Confirmations
15 |       options:
16 |         - label: "I have checked the issues tab, and couldn't find an existing issue with my feature request."
17 |           required: true
18 |   - type: textarea
19 |     id: description
20 |     attributes:
21 |       label: Description
22 |       description: A summary of the feature you want to request.
23 |     validations:
24 |       required: true


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/3_question.yml:
--------------------------------------------------------------------------------
 1 | name: Ask a question
 2 | description: Ask a question regarding iSubRip.
 3 | title: "[Question]: "
 4 | labels: [question]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         **Please use this template only for questions.
10 |         For issue / bug reports and feature requests, use one of the other templates.**
11 |   - type: textarea
12 |     id: description
13 |     attributes:
14 |       label: Question
15 |       description: The question you want to ask.
16 |     validations:
17 |       required: true


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # From: https://github.com/github/gitignore/blob/main/Python.gitignore
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # Cython debug symbols
140 | cython_debug/
141 | 
142 | # PyCharm
143 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
144 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
145 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
146 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
147 | #.idea/
148 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | ## 2.6.3 [2025-03-17]
  3 | ### Bug Fixes:
  4 | * Fixed an issue where logs containing the percentage character (`%`) would raise an error. ([Issue #82](https://github.com/MichaelYochpaz/iSubRip/issues/82))
  5 | ---
  6 | ## 2.6.2 [2025-02-04]
  7 | ### Bug Fixes:
  8 | * Fixed an issue where AppleTV API calls would fail due to changes on AppleTV requiring a missing `utsk` parameter. ([Issue #80](https://github.com/MichaelYochpaz/iSubRip/issues/80))
  9 | * Fixed an issue where iTunes URLs would not work due to iTunes no longer redirecting to AppleTV. A different method will be used now to find corresponding AppleTV URLs. Also added a retry mechanism as it appears to be a bit unreliable at times. (thanks @yonatand1230 for suggesting this method!). ([Issue #78](https://github.com/MichaelYochpaz/iSubRip/issues/78))
 10 | * Removed progress bar when where there are no matching subtitles to download (previously, it would just show 0/0 with 0% progress).
 11 | ---
 12 | ## 2.6.1 [2025-01-31]
 13 | ### Bug Fixes:
 14 | * Fixed a backwards compatibility issue in code, which would cause errors when running on Python versions lower than 3.12. ([Issue #78](https://github.com/MichaelYochpaz/iSubRip/issues/78))
 15 | ---
 16 | ## 2.6.0 [2025-01-28]
 17 | **The following update contains breaking changes to the config file.  
 18 | If you are using one, please update your config file accordingly.**
 19 | 
 20 | ### Added:
 21 | * Added a new `general.log-level` config setting, the log level of stdout (console) output. Set to `info` by default. Can be changed to `debug`, `warning`, or `error`. See the updated [example config](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml) for an example.
 22 | 
 23 | ### Changes:
 24 | * Console output has been overhauled and improved, with colorful interactive output.
 25 | * Config file is now parsed and validated in a more reliable and efficient manner. Configuration errors will now be more readable and descriptive.
 26 | * **Breaking config changes** - the `scrapers` config category has been updated. Settings that should apply for all scrapers are now under the `scrapers.default` category instead of straight under `scrapers`. See the updated [example config](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml) for examples.
 27 | * Updated AppleTV scraper request parameters.
 28 | * Minor improvements to logs.
 29 | * Python 3.8 is no longer supported. Minimum supported version has been updated to 3.9.
 30 | 
 31 | ### Bug Fixes:
 32 | * Fixed an issue where if `verify-ssl` is set to `false`, and the `urllib3` package (which isn't a dependency of iSubRip) is not installed, an error could be thrown.
 33 | ---
 34 | ## 2.5.6 [2024-07-07]
 35 | ### Bug Fixes:
 36 | * Fixed an issue where the update message from version `2.5.4` to `2.5.5` would still appear after updating. ([Issue #73](https://github.com/MichaelYochpaz/iSubRip/issues/73))
 37 | ---
 38 | ## 2.5.5 [2024-07-06]
 39 | ### Added:
 40 | * Added new `timeout` setting to the config file, for the option to change the timeout for all / specific scrapers. See the updated [example config](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml) for usage examples. ([Issue #71](https://github.com/MichaelYochpaz/iSubRip/issues/71))
 41 | 
 42 | ### Changes:
 43 | * Default timeout for requests has been updated from 5 seconds to 10 seconds. ([Issue #71](https://github.com/MichaelYochpaz/iSubRip/issues/71))
 44 | ---
 45 | ## 2.5.4 [2024-04-28]
 46 | ### Bug Fixes:
 47 | * Fixed an issue where if the `logs` directory does not exist, the folder isn't created, causing an error. ([Issue #67](https://github.com/MichaelYochpaz/iSubRip/issues/67))
 48 | * Fixed an issue where the summary log of successful and failed download would not account for failed downloads. ([Issue #68](https://github.com/MichaelYochpaz/iSubRip/issues/68))
 49 | ---
 50 | ## 2.5.3 [2024-04-09]
 51 | ### Added:
 52 | * Added new `proxy` and `verify-ssl` settings to the config file, for allowing the usage of a proxy when making requests, and disabling SSL verification. See the updated [example config](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml) for usage examples.
 53 | 
 54 | ### Changes:
 55 | * `subtitles.rtl-languages` config setting is no longer supported, and its values are now hardcoded and can't be modified.
 56 | 
 57 | ### Bug Fixes:
 58 | * Fixed an issue where in some cases, `STYLE` blocks would repeat throughout the subtitles file, and cause inaccurate cue count. ([Issue #63](https://github.com/MichaelYochpaz/iSubRip/issues/63))
 59 | * Fixed an issue where the WebVTT Style blocks would have their `STYLE` tag replaced with a `REGION` tag in downloaded subtitles.
 60 | * Fixed an issue where an empty playlist (with a size of 0 bytes) would be reported as a valid playlist with no matching subtitles. ([Issue #65](https://github.com/MichaelYochpaz/iSubRip/issues/65))
 61 | ---
 62 | ## 2.5.2 [2024-01-06]
 63 | ### Bug Fixes:
 64 | * Fixed an issue where errors would not be handled gracefully, and cause an unexpected crash. ([Issue #55](https://github.com/MichaelYochpaz/iSubRip/issues/55))
 65 | ---
 66 | ## 2.5.1 [2023-12-23]
 67 | ### Bug Fixes:
 68 | * Fixed an issue where source abbreviation was missing from file names of downloaded subtitles files. ([Issue #53](https://github.com/MichaelYochpaz/iSubRip/issues/53))
 69 | ---
 70 | ## 2.5.0 [2023-12-16]
 71 | ### Added:
 72 | * Added logs. See the new [Logs section in the README](https://github.com/MichaelYochpaz/iSubRip#logs) for more information.
 73 | * Added a new `subtitles.webvtt.subrip-alignment-conversion` config setting (which is off by default), which if set to true, will add the `{\an8}` tag at the start of lines that are annotated at the top (with the `line:0.00%` WebVTT setting) when converting to SubRip. ([Issue #35](https://github.com/MichaelYochpaz/iSubRip/issues/35))
 74 | * Implemented caching for AppleTV's storefront configuration data, which should reduce the amount of requests used when scraping multiple AppleTV URLs from the same storefront.
 75 | 
 76 | ### Changes:
 77 | * Big backend changes to the structure of the code, mostly to improve modularity and allow for easier development in the future, and improve performance.
 78 | * Updated the CLI output to utilize logs and print with colors according to log-level.
 79 | * Improved error handling in some cases where an invalid URL is used.
 80 | 
 81 | ### Bug Fixes:
 82 | * Fixed an issue where if a movie is a pre-order with a set release date, a message with availability date wouldn't be printed in some cases.
 83 | ---
 84 | ## 2.4.3 [2023-06-18]
 85 | ### Bug Fixes:
 86 | * Fixed an issue where some AppleTV URLs (or iTunes links that refer to such URLs) would not be matched in some cases, resulting in a "No matching scraper was found..." error. ([Issue #46](https://github.com/MichaelYochpaz/iSubRip/issues/46))
 87 | ---
 88 | ## 2.4.2 [2023-06-02]
 89 | ### Changes:
 90 | * Improved error handling for subtitles downloads. ([Issue #44](https://github.com/MichaelYochpaz/iSubRip/issues/44))
 91 | 
 92 | ### Bug Fixes:
 93 | * Fixed an issue where using a ZIP file, and saving to a different drive than the OS drive would fail. ([Issue #43](https://github.com/MichaelYochpaz/iSubRip/issues/43))
 94 | ---
 95 | ## 2.4.1 [2023-05-25]
 96 | ### Bug Fixes:
 97 | * Fixed an issue where saving subtitles to a different drive than the OS drive would fail. ([Issue #41](https://github.com/MichaelYochpaz/iSubRip/issues/41))
 98 | * Fixed AppleTV URLs with multiple iTunes playlists causing an error. ([Issue #42](https://github.com/MichaelYochpaz/iSubRip/issues/42))
 99 | ---
100 | ## 2.4.0 [2023-05-23]
101 | ### Added:
102 | - iTunes links will now redirect to AppleTV and scrape metadata from there, as AppleTV has additional and more accurate metadata.
103 | - Improved error messages to be more informative and case-specific:
104 |   - If a movie is a pre-order and has no available playlist, a proper error message will be printed with its release date (if available).
105 |   - If trying to scrape AppleTV+ content or series (which aren't currently supported), a proper error will be printed.
106 | 
107 | ### Changes:
108 | - A major refactor to the code, to make it more modular and allow for easier development of new features in the future.
109 | - Multiple changes (with some breaking changes) to the config file:
110 |   - The `downloads.format` setting is deprecated, and replaced by the `subtitles.convert-to-srt` setting.
111 |   - The `downloads.merge-playlists` setting is deprecated, with no replacement.  
112 |     If an AppleTV link has multiple playlists, they will be downloaded separately.
113 |   - The `downloads.user-agent` setting is deprecated, with no replacement.
114 |     The user-agent used by the scraper, will be used for downloads as well.
115 |   - The `scraping` config category no longer exists, and is replaced by a `scrapers` category, which has a sub-category with settings for each scraper (for example, a `scrapers.itunes` sub-category).
116 | - Old config paths that were previously deprecated are no longer supported and will no longer work.
117 |   The updated config settings can be found in the [example config](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml).
118 | 
119 | ### Notes:
120 | * This release includes a major rewrite of the code, which may have introduced new bugs to some core features. If you encountered one, [please report it](https://github.com/MichaelYochpaz/iSubRip/issues/new/choose).
121 | * Minimum supported Python version bumped to 3.8.
122 | * `beautifulsoup4` and `lxml` packages are no longer required or used.
123 | ---
124 | ## 2.3.3 [2022-10-09]
125 | ### Changes:
126 | * Added release year to zip file names. ([Issue #31](https://github.com/MichaelYochpaz/iSubRip/issues/31))
127 | * If the generated path for a zip file is already taken, a number will be appended at the end of the file's name to avoid overwriting. ([Issue #34](https://github.com/MichaelYochpaz/iSubRip/issues/34))
128 | 
129 | ### Bug Fixes:
130 | * Fixed an exception being thrown if the path to downloads folder on the config is invalid.
131 | * Fixed AppleTV URLs without a movie title not working. ([Issue #29](https://github.com/MichaelYochpaz/iSubRip/issues/29))
132 | * Fixed issues for movies with specific characters (`/`, `:`), and Windows reserved names in their title. ([Issue #30](https://github.com/MichaelYochpaz/iSubRip/issues/30))
133 | ---
134 | ## 2.3.2 [2022-08-06]
135 | ### Changes:
136 | * Changed config paths to the following locations:  
137 | Windows: `%USERPROFILE%\.isubrip\config.json`  
138 | Linux / macOS: `$HOME/.isubrip/config.json`  
139 | More info under Notes (and examples on the [README](https://github.com/MichaelYochpaz/iSubRip#configuration) file).
140 | 
141 | ### Bug Fixes:
142 | * Fixed an error with AppleTV links for movies released before 1970 (Epoch time). ([Issue #21](https://github.com/MichaelYochpaz/iSubRip/issues/21))
143 | * Fixed config file not being loaded on macOS. ([Issue #22](https://github.com/MichaelYochpaz/iSubRip/issues/22))
144 | * Fixed AppleTV scraping from the same storefront. ([Issue #24](https://github.com/MichaelYochpaz/iSubRip/issues/24))
145 | 
146 | ### Notes:
147 | * Running iSubRip with a config file in the previous locations will still work, but support for them will be dropped in the future.  
148 | * `xdg` package is no longer required or used.
149 | ---
150 | ## 2.3.1 [2022-07-15]
151 | ### Changes:
152 | * Improved AppleTV scraping to utilize AppleTV's API instead of scraping HTML.
153 | 
154 | ### Bug Fixes:
155 | * Fixed HTML escaped (for non-English) characters not matching AppleTV's URL RegEx. ([Issue #15](https://github.com/MichaelYochpaz/iSubRip/issues/15))
156 | ---
157 | ## 2.3.0 [2022-06-23]
158 | ### Added:
159 | * AppleTV movie URLs are now supported.
160 | * Added a `merge-playlists` config option to treat multiple playlists that can be found on AppleTV pages as one (more info on the example config).
161 | 
162 | ### Changes:
163 | * Improved subtitles parser to perserve additional WebVTT data.
164 | * The config value `user-agent` under `scraping` is now separated to 2 different values: `itunes-user-agent`, and `appletv-user-agent`.
165 | 
166 | ### Bug Fixes:
167 | * Fixed movie titles with invalid Windows file-name characters (example: '?') causing a crash. ([Issue #14](https://github.com/MichaelYochpaz/iSubRip/issues/14))
168 | * Fixed iTunes store URLs without a movie title not working. ([Issue #13](https://github.com/MichaelYochpaz/iSubRip/issues/13))
169 | ---
170 | ## 2.2.0 [2022-04-25]
171 | ### Added:
172 | * Replaced FFmpeg usage for parsing with a native subtitles parser (downloads are much faster now).
173 | * Added a `remove-duplicates` configuration remove duplicate paragraphs. (Was previously automatically fixed by FFmpeg.)
174 | * Added `fix-rtl` and `rtl-languages` configuration to fix RTL in RTL-languaged subtitles (has to be enabled in the config).
175 | 
176 | ### Changes:
177 | * FFmpeg is no longer required or used, and all FFmpeg-related settings are deprecated.
178 | 
179 | ### Notes:
180 | * `fix-rtl` is off by default and has to be enabled on the config. Check the `config.toml` example file for more info.
181 | * Minimum supported Python version bumped to 3.7.
182 | ---
183 | ## 2.1.2 [2022-04-03]
184 | ### Bug Fixes:
185 | * Fixed subtitles being downloaded twice, which causes long (doubled) download times.
186 | ---
187 | ## 2.1.1 [2022-03-28]
188 | ### Bug Fixes:
189 | * Fixed a compatibility issue with Python versions that are lower than 3.10.
190 | * Fixed downloading subtitles to an archive file not working properly.
191 | * Fixed a bug where the code continues to run if subtitles download failed, as if the download was successful.
192 | ---
193 | ## 2.1.0 [2022-03-19]
194 | ### Added:
195 | * A note will be printed if a newer version is available on PyPI (can be disabled on the config).
196 | * Config will now be checked for errors before running.
197 | 
198 | ### Changes:
199 | * Big improvements to scraping, which is now far more reliable.
200 | * Added release year to subtitles file names.
201 | * Config structure slightly changed.
202 | 
203 | ### Notes:
204 | * If you use a user-config, it might need to be updated to match the new config structure.
205 |   Example of an updated valid structure can be found [here](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml).
206 | ---
207 | ## 2.0.0 [2022-01-30]
208 | The script is now a Python package that can be installed using pip.
209 | 
210 | ### Added:
211 | * Added a config file for changing configurations. (Example can be found [here](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml))
212 | * Added an option to choose subtitles format (vtt / srt).
213 | * Added an option to choose whether to zip subtitles files or not.
214 | * Multiple links can be passed for downloading subtitles for multiple movies one after another.
215 | * Temporary files are automatically removed if the script stops unexpectedly.
216 | 
217 | ### Changes:
218 | * A complete code overhaul from a single python script file to a package, while utilizing OOP and classes.
219 | * Improved scraping algorithm for faster playlist scraping.
220 | * FFmpeg will now automatically overwrite existing subtitles with the same file name.
221 | 
222 | ### Bug Fixes:
223 | * Fixed a bug where in some cases, no subtitles were found since the title has HTML escaped characters, which causes bad matching when checking if a valid playlist was found.
224 | ---
225 | ## 1.0.6 [2021-07-23]
226 | ### Bug Fixes:
227 | * Fixed an issue where in some cases subtitles won't download when using `DOWNLOAD_FILTER` because of letter casing not matching.
228 | * Fixed and improved error handling, and added more descriptive error messages. ([Issue #9](https://github.com/MichaelYochpaz/iSubRip/issues/9))
229 | ---
230 | ## 1.0.5 [2021-05-27]
231 | ### Bug Fixes:
232 | * Fixed subtitles for some movies not being found after previous release. ([Issue #8](https://github.com/MichaelYochpaz/iSubRip/issues/8))
233 | ---
234 | ## 1.0.4 [2021-05-25]
235 | ### Bug Fixes:
236 | * Fixed the script not working after iTunes webpage data orientation slightly changed. ([Issue #6](https://github.com/MichaelYochpaz/iSubRip/issues/6) , [Issue #7](https://github.com/MichaelYochpaz/iSubRip/issues/7))
237 | ---
238 | ## 1.0.3 [2021-04-30]
239 | ### Bug Fixes:
240 | * Fixed a bug where subtitles for suggested movies are being downloaded if movie's main playlist is not found. ([Issue #2](https://github.com/MichaelYochpaz/iSubRip/issues/2))
241 | * Added a "cc" tag to closed-caption subtitles' filename to avoid a collision with non-cc subtitles. ([Issue #3](https://github.com/MichaelYochpaz/iSubRip/issues/3))
242 | ---
243 | ## 1.0.2 [2021-04-15]
244 | ### Added:
245 | * Added a User-Agent for sessions to avoid being blocked.
246 | 
247 | ### Changes:
248 | * `DOWNLOAD_FILTER` is no longer case-sensitive.
249 | * Added `lxml` to `requirements.txt`. ([Issue #1](https://github.com/MichaelYochpaz/iSubRip/issues/1))
250 | 
251 | ### Bug Fixes:
252 | * Fixed the script not working after iTunes webpage data orientation slightly changed. ([Issue #1](https://github.com/MichaelYochpaz/iSubRip/issues/1))
253 | ---
254 | ## 1.0.1 [2020-12-13]
255 | ### Changes:
256 | * Improved error handling.
257 |   
258 | ### Bug Fixes:
259 | * Fixed file name formatting.
260 | ---
261 | ## 1.0.0 [2020-11-02]
262 | * Initial release.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Michael Yochpaz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # iSubRip
  2 | **iSubRip** is a Python command-line tool for scraping and downloading subtitles from AppleTV and iTunes movie pages.
  3 | 
  4 | <div align="center">
  5 |   <a href="https://python.org/pypi/isubrip"><img alt="Python Version" src="https://img.shields.io/pypi/pyversions/isubrip"></a>
  6 |   <a href="https://python.org/pypi/isubrip"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/isubrip"></a>
  7 |   <a href="https://github.com/MichaelYochpaz/iSubRip/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/MichaelYochpaz/iSubRip"></a>
  8 | 
  9 |   <a href="https://python.org/pypi/isubrip"><img alt="Monthly Downloads" src="https://pepy.tech/badge/isubrip/month"></a>
 10 |   <a href="https://python.org/pypi/isubrip"><img alt="Total Downloads" src="https://pepy.tech/badge/isubrip"></a>
 11 |   <a href="https://github.com/MichaelYochpaz/iSubRip"><img alt="Repo Stars" src="https://img.shields.io/github/stars/MichaelYochpaz/iSubRip?style=flat&color=gold"></a>
 12 |   <a href="https://github.com/MichaelYochpaz/iSubRip/issues"><img alt="Issues" src="https://img.shields.io/github/issues/MichaelYochpaz/iSubRip?color=red"></a>
 13 | </div>
 14 | 
 15 | <br/>
 16 | 
 17 | <div align="center">
 18 |   <img src="https://github.com/user-attachments/assets/ffdbb366-8ad0-427d-af00-9b70cc0d6b01" width="800">
 19 | </div>
 20 | 
 21 | 
 22 | ---
 23 | 
 24 | ## ✨ Features
 25 | - Scrape subtitles from AppleTV and iTunes movies without needing a purchase or account.
 26 | - Retrieve the expected streaming release date (if available) for unreleased movies.
 27 | - Utilize asynchronous downloading to speed up the download of chunked subtitles.
 28 | - Automatically convert subtitles to SubRip (SRT) format.
 29 | - Fix right-to-left (RTL) alignment in RTL language subtitles automatically.
 30 | - Configure settings such as download folder, preferred languages, and toggling features.
 31 | 
 32 | ## 🚀 Quick Start
 33 | ### Installation
 34 | ```shell
 35 | pip install isubrip
 36 | ```
 37 | 
 38 | ### Usage
 39 | ```shell
 40 | isubrip <URL> [URL...]
 41 | ```
 42 | <sub>(URL can be either an AppleTV or iTunes movie URL)</sub>
 43 | 
 44 | <br/>
 45 | 
 46 | > [!WARNING]
 47 | > iSubRip is not recommended for use as a library in other projects.  
 48 | > The API frequently changes, and breaking changes to the API are common, even in minor versions.
 49 | >
 50 | > Support will not be provided for issues arising from using this package as a library.
 51 | ## 🛠 Configuration
 52 | A [TOML](https://toml.io) configuration file can be created to customize various options and features.
 53 | 
 54 | The configuration file will be searched for in one of the following paths based on your operating system:
 55 | 
 56 | - **Windows**: `%USERPROFILE%\.isubrip\config.toml`
 57 | - **Linux / macOS**: `$HOME/.isubrip/config.toml`
 58 | 
 59 | ### Path Examples
 60 | - **Windows**: `C:\Users\Michael\.isubrip\config.toml`
 61 | - **Linux**: `/home/Michael/.isubrip/config.toml`
 62 | - **macOS**: `/Users/Michael/.isubrip/config.toml`
 63 | 
 64 | 
 65 | ### Example Configuration
 66 | ```toml
 67 | [downloads]
 68 | folder = "C:\\Subtitles\\iTunes"
 69 | languages = ["en-US", "fr-FR", "he"]
 70 | zip = false
 71 | 
 72 | [subtitles]
 73 | convert-to-srt = true
 74 | fix-rtl = true
 75 | 
 76 | [subtitles.webvtt]
 77 | subrip-alignment-conversion = true
 78 | ```
 79 | 
 80 | An example config with details and explanations for all available settings can be found [here](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml).
 81 | 
 82 | ## 📜 Logs
 83 | Log files are created for each run in the following paths, depending on your operating system:
 84 | 
 85 | **Windows**: `%USERPROFILE%\.isubrip\logs`  
 86 | **Linux / macOS**: `$HOME/.isubrip/logs`  
 87 | 
 88 | Log rotation (deletion of old files once a certain number of files is reached) can be configured in the configuration file using the `general.log-rotation-size` setting. The default value is `15`.
 89 | 
 90 | For more details, see the [example configuration](https://github.com/MichaelYochpaz/iSubRip/blob/main/example-config.toml).
 91 | 
 92 | 
 93 | ## 📓 Changelog
 94 | The changelog for the latest, and all previous versions, can be found [here](https://github.com/MichaelYochpaz/iSubRip/blob/main/CHANGELOG.md).
 95 | 
 96 | ## 👨🏽‍💻 Contributing
 97 | This project is open-source but currently lacks the infrastructure to fully support external contributions.
 98 | 
 99 | If you wish to contribute, please open an issue first to discuss your proposed changes to avoid working on something that might not be accepted.
100 | 
101 | ## 🙏🏽 Support
102 | If you find this project helpful, please consider supporting it by:
103 | - 🌟 Starring the repository
104 | - 💖 [Sponsoring the project](https://github.com/sponsors/MichaelYochpaz)
105 | 
106 | ## ❤️ Special Thanks
107 | Thanks to **JetBrains** for generously providing a free open-source [PyCharm](https://www.jetbrains.com/pycharm/) license to help work on this project, through their [Open Source Support Program](https://www.jetbrains.com/community/opensource/).
108 | 
109 | [![PyCharm Logo](https://resources.jetbrains.com/storage/products/company/brand/logos/PyCharm_icon.svg)](https://www.jetbrains.com/community/opensource/#support)
110 | 
111 | ## 📝 End User License Agreement
112 | By using iSubRip, you agree to the following terms:
113 | 
114 | 1. **Disclaimer of Affiliation**: iSubRip is an independent, open-source project. It is not affiliated with, endorsed by, or in any way officially connected to Apple Inc., iTunes, or AppleTV.
115 | 2. **Educational Purpose**: This tool is developed and provided for educational and research purposes only. It demonstrates techniques for accessing and processing publicly available, unencrypted subtitle data from HLS playlists.
116 | 3. **User Responsibility and Compliance**: Any use of iSubRip is solely at the user's own risk and discretion. Users are responsible for ensuring that their use of the tool complies with all applicable laws, regulations, and terms of service of the content providers. This includes adhering to local, state, national, and international laws and regulations.
117 | 4. **Limitation of Liability**: The developers of iSubRip shall not be held responsible for any legal consequences arising from the use of this tool. This includes, but is not limited to, claims of copyright infringement, intellectual property violations, or breaches of terms of service of content providers. Users assume all risks associated with acquiring and using subtitle data through this tool.
118 | 
119 | By using iSubRip, you acknowledge that you have read, understood, and agree to be bound by this agreement's terms and conditions.
120 | 
121 | ## ⚖️ License
122 | This project is licensed under the MIT License. For more details, see the [LICENSE file](https://github.com/MichaelYochpaz/iSubRip/blob/main/LICENSE).
123 | 


--------------------------------------------------------------------------------
/example-config.toml:
--------------------------------------------------------------------------------
  1 | # ---------------- ⚠️ IMPORTANT - READ BEFORE USING ⚠️ ----------------
  2 | # This is an example config file with all available settings and their default values (if they have one).
  3 | # All settings are optional, and setting them in the config file will override their default values.
  4 | #
  5 | # In your config file, set only settings you wish to change from their default values.
  6 | # Do NOT copy this file and use it as your config, as it will override ALL settings with the values specified here.
  7 | # Use this file only as a reference to understand what different settings do,
  8 | # and to decide which settings you should use in your config.
  9 | #
 10 | # Your config file should be saved in the following path (according to OS):
 11 | #   - Windows: %USERPROFILE%\.isubrip\config.toml
 12 | #   - Linux / macOS: $HOME/.isubrip/config.toml
 13 | # ---------------------------------------------------------------------
 14 | 
 15 | [general]
 16 | # Check for updates before running, and show a note if a new version exists.
 17 | # Value can be either 'true' or 'false'.
 18 | check-for-updates = true
 19 | 
 20 | # Maximum number of log files to keep in the logs folder.
 21 | # Once the maximum number is reached, the oldest logs files will be deleted in rotation
 22 | # until the number of files equals the maximum.
 23 | log-rotation-size = 15
 24 | 
 25 | # Log level to use for stdout (console) output.
 26 | # Value can be one of: "debug", "info", "error", "warning", "critical".
 27 | log-level = "info"
 28 | 
 29 | 
 30 | [downloads]
 31 | # Folder to downloads files to.
 32 | # The default "." value means it will download to the same folder the script ran from.
 33 | # Use double backslashes in path to avoid escaping characters. Example: "C:\\Users\\<username>\\Downloads\\"
 34 | folder = "."
 35 | 
 36 | # A list of iTunes language codes to download.
 37 | # An empty array (like the one currently being used) will result in downloading all of the available subtitles.
 38 | # Example: ["en-US", "fr-FR", "he"]
 39 | languages = []
 40 | 
 41 | # Whether to overwrite existing subtitles files.
 42 | # If set to false, names of existing subtitles will have a number appended to them to avoid overwriting.
 43 | # Value can be either 'true' or 'false'.
 44 | overwrite-existing = false
 45 | 
 46 | # Save files into a zip archive if there is more than one matching subtitles.
 47 | # Value can be either 'true' or 'false'.
 48 | zip = false
 49 | 
 50 | 
 51 | [subtitles]
 52 | # Fix RTL for RTL languages (Arabic & Hebrew).
 53 | # Value can be either 'true' or 'false'.
 54 | #
 55 | # NOTE: This is off by default as some subtitles use other methods to fix RTL (like writing punctuations backwards).
 56 | #       Using this option on these type of subtitles can break the already-fixed RTL issues.
 57 | fix-rtl = false
 58 | 
 59 | # Remove duplicate paragraphs (same text and timestamps).
 60 | # Value can be either 'true' or 'false'.
 61 | remove-duplicates = true
 62 | 
 63 | # Whether to convert subtitles to SRT format.
 64 | # NOTE: This can cause loss of subtitles metadata that is not supported by SRT format.
 65 | convert-to-srt = false
 66 | 
 67 | [subtitles.webvtt]
 68 | # Whether to add a '{\an8}' tag to lines that are aligned at the top when converting format from WebVTT to SubRip.
 69 | # Relevant only if 'subtitles.convert-to-srt' is set to 'true'.
 70 | # Value can be either 'true' or 'false'.
 71 | subrip-alignment-conversion = false
 72 | 
 73 | 
 74 | [scrapers.default]
 75 | # A subcategory to set default values for all scrapers.
 76 | # These settings will be overridden by scraper-specific configuration, if set,
 77 | # These settings will not apply if the scraper has a different specific default value.
 78 | 
 79 | # Timeout in seconds for requests sent by all scrapers.
 80 | timeout = 10
 81 | 
 82 | # User-Agent to use by default for requests sent by all scrapers.
 83 | user-agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
 84 | 
 85 | # Proxy to use by default for requests sent by all scrapers.
 86 | proxy = "http://127.0.0.1:8080"
 87 | 
 88 | # Whether to verify SSL certificates when making requests for all scrapers.
 89 | # Value can be either 'true' or 'false'.
 90 | verify-ssl = true
 91 | 
 92 | 
 93 | [scrapers.scraper-name]
 94 | # Scraper-specific settings (set for each scraper separately).
 95 | # Will override any default values previously set.
 96 | # Replace 'scraper-name' with the name of the scraper to configure.
 97 | # Available scrapers: itunes, appletv
 98 | 
 99 | # Timeout in seconds for requests sent by the scraper.
100 | timeout = 10
101 | 
102 | # User-Agent to use for requests sent by the scraper.
103 | user-agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
104 | 
105 | # Proxy to use for requests sent by the scraper.
106 | proxy = "http://127.0.0.1:8080"
107 | 
108 | # Whether to verify SSL certificates when making requests for the scraper.
109 | # Value can be either 'true' or 'false'.
110 | verify-ssl = true
111 | 


--------------------------------------------------------------------------------
/isubrip/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelYochpaz/iSubRip/d7d7d1e1e4eaa93d49564b74d0ea0b25e07777b0/isubrip/__init__.py


--------------------------------------------------------------------------------
/isubrip/__main__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | import logging
  5 | import sys
  6 | from typing import TYPE_CHECKING
  7 | 
  8 | import httpx
  9 | from pydantic import ValidationError
 10 | 
 11 | from isubrip.cli import console
 12 | from isubrip.commands.download import download
 13 | from isubrip.config import Config
 14 | from isubrip.constants import (
 15 |     DATA_FOLDER_PATH,
 16 |     EVENT_LOOP,
 17 |     LOG_FILES_PATH,
 18 |     PACKAGE_NAME,
 19 |     PACKAGE_VERSION,
 20 |     USER_CONFIG_FILE_PATH,
 21 | )
 22 | from isubrip.logger import logger, setup_loggers
 23 | from isubrip.scrapers.scraper import Scraper, ScraperFactory
 24 | from isubrip.subtitle_formats.webvtt import WebVTTCaptionBlock
 25 | from isubrip.utils import (
 26 |     TempDirGenerator,
 27 |     convert_log_level,
 28 |     format_config_validation_error,
 29 |     get_model_field,
 30 |     raise_for_status,
 31 |     single_string_to_list,
 32 | )
 33 | 
 34 | if sys.version_info >= (3, 11):
 35 |     import tomllib
 36 | else:
 37 |     import tomli as tomllib
 38 | 
 39 | if TYPE_CHECKING:
 40 |     from pathlib import Path
 41 | 
 42 | log_rotation_size: int = 15  # Default size, before being updated by the config file.
 43 | 
 44 | 
 45 | def main() -> None:
 46 |     """A wrapper for the actual main function that handles exceptions and cleanup."""
 47 |     try:
 48 |         _main()
 49 | 
 50 |     except Exception as ex:
 51 |         logger.error(f"Error: {ex}")
 52 |         logger.debug("Debug information:", exc_info=True)
 53 |         exit(1)
 54 |     
 55 |     except KeyboardInterrupt:
 56 |         logger.debug("Keyboard interrupt detected, exiting...")
 57 |         exit(0)
 58 | 
 59 |     finally:
 60 |         if log_rotation_size > 0:
 61 |             handle_log_rotation(rotation_size=log_rotation_size)
 62 | 
 63 |         # NOTE: This will only close scrapers initialized using the ScraperFactory.
 64 |         async_cleanup_coroutines = []
 65 |         for scraper in ScraperFactory.get_initialized_scrapers():
 66 |             logger.debug(f"Requests count for '{scraper.name}' scraper: {scraper.requests_count}")
 67 |             scraper.close()
 68 |             async_cleanup_coroutines.append(scraper.async_close())
 69 | 
 70 |         EVENT_LOOP.run_until_complete(asyncio.gather(*async_cleanup_coroutines))
 71 |         TempDirGenerator.cleanup()
 72 | 
 73 | def _main() -> None:
 74 |     # Assure at least one argument was passed
 75 |     if len(sys.argv) < 2:
 76 |         logger.info(f"Usage: {PACKAGE_NAME} <iTunes movie URL> [iTunes movie URL...]")
 77 |         exit(0)
 78 | 
 79 |     # Generate the data folder if it doesn't previously exist
 80 |     if not DATA_FOLDER_PATH.is_dir():
 81 |         DATA_FOLDER_PATH.mkdir(parents=True)
 82 | 
 83 |     config = parse_config(config_file_location=USER_CONFIG_FILE_PATH)
 84 | 
 85 |     setup_loggers(
 86 |         stdout_loglevel=convert_log_level(log_level=config.general.log_level),
 87 |         stdout_console=console,
 88 |         logfile_output=True,
 89 |         logfile_loglevel=logging.DEBUG,
 90 |     )
 91 | 
 92 |     cli_args = " ".join(sys.argv[1:])
 93 |     logger.debug(f"CLI Command: {PACKAGE_NAME} {cli_args}")
 94 |     logger.debug(f"Python version: {sys.version}")
 95 |     logger.debug(f"Package version: {PACKAGE_VERSION}")
 96 |     logger.debug(f"OS: {sys.platform}")
 97 | 
 98 |     update_settings(config=config)
 99 | 
100 |     if config.general.check_for_updates:
101 |         check_for_updates(current_package_version=PACKAGE_VERSION)
102 | 
103 |     EVENT_LOOP.run_until_complete(download(
104 |         *single_string_to_list(item=sys.argv[1:]),
105 |         download_path=config.downloads.folder,
106 |         language_filter=config.downloads.languages,
107 |         convert_to_srt=config.subtitles.convert_to_srt,
108 |         overwrite_existing=config.downloads.overwrite_existing,
109 |         zip=config.downloads.zip,
110 |     ))
111 | 
112 | 
113 | def check_for_updates(current_package_version: str) -> None:
114 |     """
115 |     Check and print if a newer version of the package is available, and log accordingly.
116 | 
117 |     Args:
118 |         current_package_version (str): The current version of the package.
119 |     """
120 |     api_url = f"https://pypi.org/pypi/{PACKAGE_NAME}/json"
121 |     logger.debug("Checking for package updates on PyPI...")
122 |     try:
123 |         response = httpx.get(
124 |             url=api_url,
125 |             headers={"Accept": "application/json"},
126 |             timeout=5,
127 |         )
128 |         raise_for_status(response)
129 |         response_data = response.json()
130 | 
131 |         pypi_latest_version = response_data["info"]["version"]
132 | 
133 |         if pypi_latest_version != current_package_version:
134 |             logger.warning(f"You are currently using version '{current_package_version}' of '{PACKAGE_NAME}', "
135 |                            f"however version '{pypi_latest_version}' is available."
136 |                            f'\nConsider upgrading by running "pip install --upgrade {PACKAGE_NAME}"')
137 | 
138 |         else:
139 |             logger.debug(f"Latest version of '{PACKAGE_NAME}' ({current_package_version}) is currently installed.")
140 | 
141 |     except Exception as e:
142 |         logger.warning(f"Update check failed: {e}")
143 |         logger.debug("Debug information:", exc_info=True)
144 |         return
145 | 
146 | 
147 | def handle_log_rotation(rotation_size: int) -> None:
148 |     """
149 |     Handle log rotation and remove old log files if needed.
150 | 
151 |     Args:
152 |         rotation_size (int): Maximum amount of log files to keep.
153 |     """
154 |     sorted_log_files = sorted(LOG_FILES_PATH.glob("*.log"), key=lambda file: file.stat().st_mtime, reverse=True)
155 | 
156 |     if len(sorted_log_files) > rotation_size:
157 |         for log_file in sorted_log_files[rotation_size:]:
158 |             log_file.unlink()
159 | 
160 | 
161 | def parse_config(config_file_location: Path) -> Config:
162 |     """
163 |     Parse the configuration file and return a Config instance.
164 |     Exit the program (with code 1) if an error occurs while parsing the configuration file.
165 | 
166 |     Args:
167 |         config_file_location (Path): The location of the configuration file.
168 | 
169 |     Returns:
170 |         Config: An instance of the Config.
171 |     """
172 |     try:
173 |         with config_file_location.open('rb') as file:
174 |             config_data = tomllib.load(file)
175 | 
176 |         return Config.model_validate(config_data)
177 | 
178 |     except ValidationError as e:
179 |         logger.error("Invalid configuration - the following errors were found in the configuration file:\n" +
180 |                      format_config_validation_error(exc=e) +
181 |                      "\nPlease update your configuration to resolve this issue.")
182 |         logger.debug("Debug information:", exc_info=True)
183 |         exit(1)
184 | 
185 | 
186 |     except tomllib.TOMLDecodeError as e:
187 |         logger.error(f"Error parsing config file: {e}")
188 |         logger.debug("Debug information:", exc_info=True)
189 |         exit(1)
190 | 
191 | 
192 |     except Exception as e:
193 |         logger.error(f"Error loading configuration: {e}")
194 |         logger.debug("Debug information:", exc_info=True)
195 |         exit(1)
196 | 
197 | 
198 | def update_settings(config: Config) -> None:
199 |     """
200 |     Update settings according to config.
201 | 
202 |     Args:
203 |         config (Config): An instance of a config to set settings according to.
204 |     """
205 |     if config.general.log_level.casefold() == "debug":
206 |         console.is_interactive = False
207 | 
208 |     Scraper.subtitles_fix_rtl = config.subtitles.fix_rtl
209 |     Scraper.subtitles_remove_duplicates = config.subtitles.remove_duplicates
210 | 
211 |     Scraper.default_timeout = config.scrapers.default.timeout
212 |     Scraper.default_user_agent = config.scrapers.default.user_agent
213 |     Scraper.default_proxy = config.scrapers.default.proxy
214 |     Scraper.default_verify_ssl = config.scrapers.default.verify_ssl
215 | 
216 |     for scraper in ScraperFactory.get_scraper_classes():
217 |         if scraper_config := get_model_field(model=config.scrapers, field=scraper.id):
218 |             scraper.config = scraper_config
219 | 
220 |     WebVTTCaptionBlock.subrip_alignment_conversion = (
221 |         config.subtitles.webvtt.subrip_alignment_conversion
222 |     )
223 | 
224 |     if config.general.log_rotation_size:
225 |         global log_rotation_size
226 |         log_rotation_size = config.general.log_rotation_size
227 | 
228 | 
229 | if __name__ == "__main__":
230 |     main()
231 | 


--------------------------------------------------------------------------------
/isubrip/cli.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterator
 2 | from contextlib import contextmanager
 3 | from typing import Any, Optional
 4 | 
 5 | from rich.console import Console
 6 | from rich.live import Live
 7 | 
 8 | console = Console(
 9 |     highlight=False,
10 | )
11 | 
12 | @contextmanager 
13 | def conditional_live(renderable: Any) -> Iterator[Optional[Live]]:
14 |     """
15 |     A context manager that conditionally enables Rich's Live display based on console interactivity.
16 |     
17 |     When console.is_interactive is True, this behaves like Rich's Live display.
18 |     When console.is_interactive is False, live updates are disabled.
19 | 
20 |     Args:
21 |         renderable: The Rich renderable object to display in live mode.
22 | 
23 |     Yields:
24 |         Optional[Live]: The Live display object if console is interactive, None otherwise.
25 | 
26 |     Example:
27 |         ```python
28 |         with conditional_live(progress) as live:
29 |             # Your code here
30 |             if live:  # Optional: Check if live display is active
31 |                 live.update(...)
32 |         ```
33 |     """
34 |     if console.is_interactive:
35 |         with Live(renderable, console=console) as live:
36 |             yield live
37 |     else:
38 |         yield None
39 | 


--------------------------------------------------------------------------------
/isubrip/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelYochpaz/iSubRip/d7d7d1e1e4eaa93d49564b74d0ea0b25e07777b0/isubrip/commands/__init__.py


--------------------------------------------------------------------------------
/isubrip/commands/download.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | import shutil
  5 | 
  6 | from rich.console import Group
  7 | from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
  8 | from rich.text import Text
  9 | 
 10 | from isubrip.cli import conditional_live, console
 11 | from isubrip.data_structures import (
 12 |     Episode,
 13 |     MediaData,
 14 |     Movie,
 15 |     ScrapedMediaResponse,
 16 |     Season,
 17 |     Series,
 18 |     SubtitlesData,
 19 |     SubtitlesDownloadResults,
 20 | )
 21 | from isubrip.logger import logger
 22 | from isubrip.scrapers.scraper import PlaylistLoadError, Scraper, ScraperError, ScraperFactory, SubtitlesDownloadError
 23 | from isubrip.utils import (
 24 |     TempDirGenerator,
 25 |     download_subtitles_to_file,
 26 |     format_list,
 27 |     format_media_description,
 28 |     generate_media_folder_name,
 29 |     generate_non_conflicting_path,
 30 | )
 31 | 
 32 | 
 33 | async def download(*urls: str,
 34 |                    download_path: Path,
 35 |                    language_filter: list[str] | None = None,
 36 |                    convert_to_srt: bool = False,
 37 |                    overwrite_existing: bool = True,
 38 |                    zip: bool = False) -> None:
 39 |     """
 40 |     Download subtitles from given URLs.
 41 | 
 42 |     Args:
 43 |         urls (list[str]): A list of URLs to download subtitles from.
 44 |         download_path (Path): Path to a folder where the subtitles will be downloaded to.
 45 |         language_filter (list[str] | None): List of specific languages to download. None for all languages (no filter).
 46 |             Defaults to None.
 47 |         convert_to_srt (bool, optional): Whether to convert the subtitles to SRT format. Defaults to False.
 48 |         overwrite_existing (bool, optional): Whether to overwrite existing subtitles. Defaults to True.
 49 |         zip (bool, optional): Whether to zip multiple subtitles. Defaults to False.
 50 |     """
 51 |     for url in urls:
 52 |         try:
 53 |             logger.info(f"Scraping [blue]{url}[/blue]")
 54 | 
 55 |             scraper = ScraperFactory.get_scraper_instance(url=url)
 56 | 
 57 |             try:
 58 |                 logger.debug(f"Fetching {url}")
 59 |                 scraper_response: ScrapedMediaResponse = await scraper.get_data(url=url)
 60 | 
 61 |             except ScraperError as e:
 62 |                 logger.error(f"Error: {e}")
 63 |                 logger.debug("Debug information:", exc_info=True)
 64 |                 continue
 65 | 
 66 |             media_data = scraper_response.media_data
 67 |             playlist_scraper = ScraperFactory.get_scraper_instance(scraper_id=scraper_response.playlist_scraper)
 68 | 
 69 |             if not media_data:
 70 |                 logger.error(f"Error: No supported media was found for {url}.")
 71 |                 continue
 72 | 
 73 |             for media_item in media_data:
 74 |                 try:
 75 |                     logger.info(f"Found {media_item.media_type}: "
 76 |                                 f"[cyan]{format_media_description(media_data=media_item)}[/cyan]")
 77 |                     await download_media(scraper=playlist_scraper,
 78 |                                         media_item=media_item,
 79 |                                         download_path=download_path,
 80 |                                         language_filter=language_filter,
 81 |                                         convert_to_srt=convert_to_srt,
 82 |                                         overwrite_existing=overwrite_existing,
 83 |                                         zip=zip)
 84 | 
 85 |                 except Exception as e:
 86 |                     if len(media_data) > 1:
 87 |                         logger.warning(f"Error scraping media item "
 88 |                                     f"'{format_media_description(media_data=media_item)}': {e}\n"
 89 |                                     f"Skipping to next media item...")
 90 |                         logger.debug("Debug information:", exc_info=True)
 91 |                         continue
 92 | 
 93 |                     raise
 94 | 
 95 |         except Exception as e:
 96 |             logger.error(f"Error while scraping '{url}': {e}")
 97 |             logger.debug("Debug information:", exc_info=True)
 98 |             continue
 99 | 
100 | 
101 | async def download_media(scraper: Scraper, media_item: MediaData, download_path: Path,
102 |                               language_filter: list[str] | None = None, convert_to_srt: bool = False,
103 |                               overwrite_existing: bool = True, zip: bool = False) -> None:
104 |     """
105 |     Download a media item.
106 | 
107 |     Args:
108 |         scraper (Scraper): A Scraper object to use for downloading subtitles.
109 |         media_item (MediaData): A media data item to download subtitles for.
110 |         download_path (Path): Path to a folder where the subtitles will be downloaded to.
111 |         language_filter (list[str] | None): List of specific languages to download. None for all languages (no filter).
112 |             Defaults to None.
113 |         convert_to_srt (bool, optional): Whether to convert the subtitles to SRT format. Defaults to False.
114 |         overwrite_existing (bool, optional): Whether to overwrite existing subtitles. Defaults to True.
115 |         zip (bool, optional): Whether to zip multiple subtitles. Defaults to False.
116 |     """
117 |     if isinstance(media_item, Series):
118 |         for season in media_item.seasons:
119 |             await download_media(media_item=season, scraper=scraper, download_path=download_path,
120 |                                  language_filter=language_filter, convert_to_srt=convert_to_srt,
121 |                                  overwrite_existing=overwrite_existing, zip=zip)
122 | 
123 |     elif isinstance(media_item, Season):
124 |         for episode in media_item.episodes:
125 |             logger.info(f"{format_media_description(media_data=episode, shortened=True)}:")
126 |             await download_media_item(media_item=episode, scraper=scraper, download_path=download_path,
127 |                                  language_filter=language_filter, convert_to_srt=convert_to_srt,
128 |                                  overwrite_existing=overwrite_existing, zip=zip)
129 | 
130 |     elif isinstance(media_item, (Movie, Episode)):
131 |         await download_media_item(media_item=media_item, scraper=scraper, download_path=download_path,
132 |                                  language_filter=language_filter, convert_to_srt=convert_to_srt,
133 |                                  overwrite_existing=overwrite_existing, zip=zip)
134 | 
135 | 
136 | async def download_media_item(scraper: Scraper, media_item: Movie | Episode, download_path: Path,
137 |                               language_filter: list[str] | None = None, convert_to_srt: bool = False,
138 |                               overwrite_existing: bool = True, zip: bool = False) -> None:
139 |     """
140 |     Download subtitles for a single media item.
141 | 
142 |     Args:
143 |         scraper (Scraper): A Scraper object to use for downloading subtitles.
144 |         media_item (Movie | Episode): A movie or episode data object.
145 |         download_path (Path): Path to a folder where the subtitles will be downloaded to.
146 |         language_filter (list[str] | None): List of specific languages to download. None for all languages (no filter).
147 |             Defaults to None.
148 |         convert_to_srt (bool, optional): Whether to convert the subtitles to SRT format. Defaults to False.
149 |         overwrite_existing (bool, optional): Whether to overwrite existing subtitles. Defaults to True.
150 |         zip (bool, optional): Whether to zip multiple subtitles. Defaults to False.
151 |     """
152 |     ex: Exception | None = None
153 | 
154 |     if media_item.playlist:
155 |         try:
156 |             results = await download_subtitles(
157 |                 scraper=scraper,
158 |                 media_data=media_item,
159 |                 download_path=download_path,
160 |                 language_filter=language_filter,
161 |                 convert_to_srt=convert_to_srt,
162 |                 overwrite_existing=overwrite_existing,
163 |                 zip=zip,
164 |             )
165 | 
166 |             success_count = len(results.successful_subtitles)
167 |             failed_count = len(results.failed_subtitles)
168 | 
169 |             if success_count or failed_count:
170 |                 logger.info(f"{success_count}/{success_count + failed_count} subtitles were successfully downloaded.")
171 | 
172 |             else:
173 |                 logger.info("No matching subtitles were found.")
174 | 
175 |             return  # noqa: TRY300
176 | 
177 |         except PlaylistLoadError as e:
178 |             ex = e
179 | 
180 |     # We get here if there is no playlist, or there is one, but it failed to load
181 |     if isinstance(media_item, Movie) and media_item.preorder_availability_date:
182 |         logger.info(f"[gold1]'{media_item.name}' is currently unavailable on {scraper.name}, "
183 |                     f"and will be available on {media_item.preorder_availability_date.strftime(r'%d/%m/%Y')}.[/gold1]")
184 | 
185 |     else:
186 |         if ex:
187 |             logger.error(f"Error: {ex}")
188 | 
189 |         else:
190 |             logger.error("Error: No valid playlist was found.")
191 | 
192 | 
193 | async def download_subtitles(scraper: Scraper, media_data: Movie | Episode, download_path: Path,
194 |                              language_filter: list[str] | None = None, convert_to_srt: bool = False,
195 |                              overwrite_existing: bool = True, zip: bool = False) -> SubtitlesDownloadResults:
196 |     """
197 |     Download subtitles for the given media data.
198 | 
199 |     Args:
200 |         scraper (Scraper): A Scraper object to use for downloading subtitles.
201 |         media_data (Movie | Episode): A movie or episode data object.
202 |         download_path (Path): Path to a folder where the subtitles will be downloaded to.
203 |         language_filter (list[str] | None): List of specific languages to download. None for all languages (no filter).
204 |             Defaults to None.
205 |         convert_to_srt (bool, optional): Whether to convert the subtitles to SRT format. Defaults to False.
206 |         overwrite_existing (bool, optional): Whether to overwrite existing subtitles. Defaults to True.
207 |         zip (bool, optional): Whether to zip multiple subtitles. Defaults to False.
208 | 
209 |     Returns:
210 |         SubtitlesDownloadResults: A SubtitlesDownloadResults object containing the results of the download.
211 |     """
212 |     temp_dir_name = generate_media_folder_name(media_data=media_data, source=scraper.abbreviation)
213 |     temp_download_path = TempDirGenerator.generate(directory_name=temp_dir_name)
214 | 
215 |     successful_downloads: list[SubtitlesData] = []
216 |     failed_downloads: list[SubtitlesDownloadError] = []
217 |     temp_downloads: list[Path] = []
218 | 
219 |     if not media_data.playlist:
220 |         raise PlaylistLoadError("No playlist was found for provided media data.")
221 | 
222 |     main_playlist = await scraper.load_playlist(url=media_data.playlist)  # type: ignore[func-returns-value]
223 | 
224 |     if not main_playlist:
225 |         raise PlaylistLoadError("Failed to load the main playlist.")
226 | 
227 |     matching_subtitles = scraper.find_matching_subtitles(main_playlist=main_playlist,  # type: ignore[var-annotated]
228 |                                                          language_filter=language_filter)
229 | 
230 |     # If no matching subtitles were found, there's no need to continue
231 |     if not matching_subtitles:
232 |         return SubtitlesDownloadResults(
233 |             media_data=media_data,
234 |             successful_subtitles=successful_downloads,
235 |             failed_subtitles=failed_downloads,
236 |             is_zip=zip,
237 |         )
238 | 
239 |     logger.debug(f"{len(matching_subtitles)} matching subtitles were found.")
240 |     downloaded_subtitles: list[str] = []
241 |     progress = Progress(
242 |         SpinnerColumn(),
243 |         TextColumn("[progress.description]{task.description}"),
244 |         BarColumn(),
245 |         TextColumn("[progress.percentage][yellow]{task.percentage:>3.0f}%[/yellow]"),
246 |         TextColumn("{task.completed}/{task.total}"),
247 |         console=console,
248 |     )
249 |     
250 |     task = progress.add_task("Downloading subtitles...", total=len(matching_subtitles))
251 |     downloaded_list = Text(f"Downloaded subtitles ({len(downloaded_subtitles)}/{len(matching_subtitles)}):")
252 | 
253 |     with conditional_live(Group(downloaded_list, progress)) as live:
254 |         for matching_subtitles_item in matching_subtitles:
255 |             language_info = scraper.format_subtitles_description(
256 |                 subtitles_media=matching_subtitles_item,
257 |             )
258 | 
259 |             if live:
260 |                 progress.update(task, advance=1, description=f"Processing [magenta]{language_info}[/magenta]")
261 | 
262 |             try:
263 |                 subtitles_data = await scraper.download_subtitles(media_data=matching_subtitles_item,
264 |                                                                   subrip_conversion=convert_to_srt)
265 | 
266 |             except Exception as e:
267 |                 if isinstance(e, SubtitlesDownloadError):
268 |                     failed_downloads.append(e)
269 |                     original_error = e.original_exc
270 |                 
271 |                 else:
272 |                     original_error = e
273 | 
274 |                 logger.warning(f"Failed to download '{language_info}' subtitles: {original_error}")
275 |                 logger.debug("Debug information:", exc_info=original_error)
276 |                 continue
277 | 
278 |             try:
279 |                 temp_downloads.append(download_subtitles_to_file(
280 |                     media_data=media_data,
281 |                     subtitles_data=subtitles_data,
282 |                     output_path=temp_download_path,
283 |                     source_abbreviation=scraper.abbreviation,
284 |                     overwrite=overwrite_existing,
285 |                 ))
286 | 
287 |                 downloaded_subtitles.append(f"• {language_info}")
288 |                 if live:
289 |                     downloaded_list.plain = (
290 |                         f"Downloaded subtitles ({len(downloaded_subtitles)}/{len(matching_subtitles)}):\n"
291 |                         f"{format_list(downloaded_subtitles, width=console.width)}"
292 |                     )
293 |                 logger.info(f"{language_info} subtitles were successfully downloaded.",
294 |                             extra={"hide_when_interactive": True})
295 |                 successful_downloads.append(subtitles_data)
296 | 
297 |             except Exception as e:
298 |                 logger.warning(f"Failed to save '{language_info}' subtitles: {e}")
299 |                 logger.debug("Debug information:", exc_info=True)
300 |                 failed_downloads.append(
301 |                     SubtitlesDownloadError(
302 |                         language_code=subtitles_data.language_code,
303 |                         language_name=subtitles_data.language_name,
304 |                         special_type=subtitles_data.special_type,
305 |                         original_exc=e,
306 |                     ),
307 |                 )
308 | 
309 |     if not zip or len(temp_downloads) == 1:
310 |         for file_path in temp_downloads:
311 |             if overwrite_existing:
312 |                 new_path = download_path / file_path.name
313 | 
314 |             else:
315 |                 new_path = generate_non_conflicting_path(file_path=download_path / file_path.name)
316 | 
317 |             shutil.move(src=file_path, dst=new_path)
318 | 
319 |     elif len(temp_downloads) > 0:
320 |         zip_path = Path(shutil.make_archive(
321 |             base_name=str(temp_download_path.parent / temp_download_path.name),
322 |             format="zip",
323 |             root_dir=temp_download_path,
324 |         ))
325 | 
326 |         file_name = generate_media_folder_name(media_data=media_data,
327 |                                                source=scraper.abbreviation) + ".zip"
328 | 
329 |         if overwrite_existing:
330 |             destination_path = download_path / file_name
331 | 
332 |         else:
333 |             destination_path = generate_non_conflicting_path(file_path=download_path / file_name)
334 | 
335 |         shutil.move(src=str(zip_path), dst=destination_path)
336 | 
337 |     return SubtitlesDownloadResults(
338 |         media_data=media_data,
339 |         successful_subtitles=successful_downloads,
340 |         failed_subtitles=failed_downloads,
341 |         is_zip=zip,
342 |     )
343 | 


--------------------------------------------------------------------------------
/isubrip/config.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABC
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING, Literal
  6 | 
  7 | from pydantic import AliasGenerator, BaseModel, ConfigDict, Field, create_model, field_validator
  8 | from pydantic_core import PydanticCustomError
  9 | from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
 10 | 
 11 | from isubrip.scrapers.scraper import DefaultScraperConfig, ScraperFactory
 12 | 
 13 | 
 14 | class ConfigCategory(BaseModel, ABC):
 15 |     """A base class for settings categories."""
 16 |     model_config = ConfigDict(
 17 |         extra='allow',
 18 |         alias_generator=AliasGenerator(
 19 |             validation_alias=lambda field_name: field_name.replace('_', '-'),
 20 |         ),
 21 |     )
 22 | 
 23 | 
 24 | class GeneralCategory(ConfigCategory):
 25 |     check_for_updates: bool = Field(default=True)
 26 |     verbose: bool = Field(default=False)
 27 |     log_level: Literal["debug", "info", "warning", "error", "critical"] = Field(default="info")
 28 |     log_rotation_size: int = Field(default=15)
 29 | 
 30 | 
 31 | class DownloadsCategory(ConfigCategory):
 32 |     folder: Path = Field(default=Path.cwd().resolve())
 33 |     languages: list[str] = Field(default=[])
 34 |     overwrite_existing: bool = Field(default=False)
 35 |     zip: bool = Field(default=False)
 36 | 
 37 |     @field_validator('folder')
 38 |     @classmethod
 39 |     def assure_path_exists(cls, value: Path) -> Path:
 40 |         if value.exists():
 41 |             if not value.is_dir():
 42 |                 raise PydanticCustomError(
 43 |                     "invalid_path",
 44 |                     "Path is not a directory.",
 45 |                 )
 46 | 
 47 |         else:
 48 |             raise PydanticCustomError(
 49 |                 "invalid_path",
 50 |                 "Path does not exist.")
 51 | 
 52 |         return value
 53 | 
 54 | 
 55 | class WebVTTSubcategory(ConfigCategory):
 56 |     subrip_alignment_conversion: bool = Field(default=False)
 57 | 
 58 | 
 59 | class SubtitlesCategory(ConfigCategory):
 60 |     fix_rtl: bool = Field(default=False)
 61 |     remove_duplicates: bool = Field(default=True)
 62 |     convert_to_srt: bool = Field(default=False)
 63 |     webvtt: WebVTTSubcategory = WebVTTSubcategory()
 64 | 
 65 | 
 66 | class ScrapersCategory(ConfigCategory):
 67 |     default: DefaultScraperConfig = Field(default_factory=DefaultScraperConfig)
 68 | 
 69 | 
 70 | # Resolve mypy errors as mypy doesn't support dynamic models.
 71 | if TYPE_CHECKING:
 72 |     DynamicScrapersCategory = ScrapersCategory
 73 | 
 74 | else:
 75 |     # A config model that's dynamically created based on the available scrapers and their configurations.
 76 |     DynamicScrapersCategory = create_model(
 77 |         'DynamicScrapersCategory',
 78 |         __base__=ScrapersCategory,
 79 |         **{
 80 |             scraper.id: (scraper.ScraperConfig, Field(default_factory=scraper.ScraperConfig))
 81 |             for scraper in ScraperFactory.get_scraper_classes()
 82 |         },  # type: ignore[call-overload]
 83 |     )
 84 | 
 85 | 
 86 | class Config(BaseSettings):
 87 |     model_config = SettingsConfigDict(
 88 |         extra='forbid',
 89 |     )
 90 | 
 91 |     general: GeneralCategory = Field(default_factory=GeneralCategory)
 92 |     downloads: DownloadsCategory = Field(default_factory=DownloadsCategory)
 93 |     subtitles: SubtitlesCategory = Field(default_factory=SubtitlesCategory)
 94 |     scrapers: DynamicScrapersCategory = Field(default_factory=DynamicScrapersCategory)
 95 | 
 96 |     @classmethod
 97 |     def settings_customise_sources(
 98 |         cls,
 99 |         settings_cls: type[BaseSettings],
100 |         init_settings: PydanticBaseSettingsSource,
101 |         env_settings: PydanticBaseSettingsSource,
102 |         dotenv_settings: PydanticBaseSettingsSource,
103 |         file_secret_settings: PydanticBaseSettingsSource,
104 |     ) -> tuple[PydanticBaseSettingsSource, ...]:
105 |         return (
106 |             init_settings,
107 |             TomlConfigSettingsSource(settings_cls),
108 |             env_settings,
109 |             dotenv_settings,
110 |             file_secret_settings,
111 |         )
112 | 


--------------------------------------------------------------------------------
/isubrip/constants.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import asyncio
 4 | import datetime as dt
 5 | from pathlib import Path
 6 | from tempfile import gettempdir
 7 | 
 8 | # General
 9 | PACKAGE_NAME = "isubrip"
10 | PACKAGE_VERSION = "2.6.3"
11 | 
12 | # Async
13 | EVENT_LOOP = asyncio.get_event_loop()
14 | 
15 | # Paths
16 | DEFAULT_CONFIG_PATH = Path(__file__).parent / "resources" / "default_config.toml"
17 | DATA_FOLDER_PATH = Path.home() / f".{PACKAGE_NAME}"
18 | SCRAPER_MODULES_SUFFIX = "_scraper"
19 | TEMP_FOLDER_PATH = Path(gettempdir()) / PACKAGE_NAME
20 | 
21 | # Config Paths
22 | USER_CONFIG_FILE_NAME = "config.toml"
23 | USER_CONFIG_FILE_PATH = DATA_FOLDER_PATH / USER_CONFIG_FILE_NAME
24 | 
25 | # Logging Paths
26 | LOG_FILES_PATH = DATA_FOLDER_PATH / "logs"
27 | LOG_FILE_NAME = f"{PACKAGE_NAME}_{dt.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
28 | 
29 | 
30 | # Other
31 | TITLE_REPLACEMENT_STRINGS = {  # Replacements will be done by the order of the keys.
32 |     ": ": ".", ":": ".", " - ": "-", ", ": ".", ". ": ".", " ": ".", "|": ".", "/": ".", "…": ".",
33 |     "<": "", ">": "", "(": "", ")": "", '"': "", "?": "", "*": "",
34 | }
35 | WINDOWS_RESERVED_FILE_NAMES = ("CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
36 |                                "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9")
37 | 


--------------------------------------------------------------------------------
/isubrip/data_structures.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABC
  4 | import datetime as dt  # noqa: TC003
  5 | from enum import Enum
  6 | from typing import TYPE_CHECKING, Generic, Literal, NamedTuple, Optional, TypeVar, Union
  7 | 
  8 | import m3u8
  9 | from pydantic import BaseModel
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from isubrip.scrapers.scraper import SubtitlesDownloadError
 13 | 
 14 | T = TypeVar("T")
 15 | MainPlaylist = TypeVar("MainPlaylist", bound=m3u8.M3U8)
 16 | PlaylistMediaItem = TypeVar("PlaylistMediaItem", bound=m3u8.Media)
 17 | 
 18 | MediaData = TypeVar("MediaData", bound="MediaBase")
 19 | 
 20 | 
 21 | class SubtitlesDownloadResults(NamedTuple):
 22 |     """
 23 |     A named tuple containing download results.
 24 | 
 25 |     Attributes:
 26 |         media_data (Movie | Episode): An object containing metadata about the media the subtitles were downloaded for.
 27 |         successful_subtitles (list[SubtitlesData]): List of subtitles that were successfully downloaded.
 28 |         failed_subtitles (list[SubtitlesData]): List of subtitles that failed to download.
 29 |         is_zip (bool): Whether the subtitles were saved in a zip file.
 30 |     """
 31 |     media_data: Movie | Episode
 32 |     successful_subtitles: list[SubtitlesData]
 33 |     failed_subtitles: list[SubtitlesDownloadError]
 34 |     is_zip: bool
 35 | 
 36 | 
 37 | class SubtitlesFormat(BaseModel):
 38 |     """
 39 |     An object containing subtitles format data.
 40 | 
 41 |     Attributes:
 42 |         name (str): Name of the format.
 43 |         file_extension (str): File extension of the format.
 44 |     """
 45 |     name: str
 46 |     file_extension: str
 47 | 
 48 | 
 49 | class SubtitlesFormatType(Enum):
 50 |     """
 51 |     An Enum representing subtitles formats.
 52 | 
 53 |     Attributes:
 54 |         SUBRIP (SubtitlesFormat): SubRip format.
 55 |         WEBVTT (SubtitlesFormat): WebVTT format.
 56 |     """
 57 |     SUBRIP = SubtitlesFormat(name="SubRip", file_extension="srt")
 58 |     WEBVTT = SubtitlesFormat(name="WebVTT", file_extension="vtt")
 59 | 
 60 | 
 61 | class SubtitlesType(Enum):
 62 |     """
 63 |     Subtitles special type.
 64 | 
 65 |     Attributes:
 66 |         CC (SubtitlesType): Closed captions.
 67 |         FORCED (SubtitlesType): Forced subtitles.
 68 |     """
 69 |     CC = "CC"
 70 |     FORCED = "Forced"
 71 | 
 72 | 
 73 | class SubtitlesData(BaseModel):
 74 |     """
 75 |     An object containing subtitles data and metadata.
 76 | 
 77 |     Attributes:
 78 |         language_code (str): Language code of the language the subtitles are in.
 79 |         language_name (str | None, optional): Name of the language the subtitles are in.
 80 |         subtitles_format (SubtitlesFormatType): Format of the subtitles.
 81 |         content (bytes): Content of the subtitles in binary format.
 82 |         content_encoding (str): Encoding of subtitles content (ex. "utf-8").
 83 |         special_type (SubtitlesType | None, optional): Type of the subtitles, if they're not regular. Defaults to None.
 84 |     """
 85 |     language_code: str
 86 |     subtitles_format: SubtitlesFormatType
 87 |     content: bytes
 88 |     content_encoding: str
 89 |     language_name: Optional[str] = None
 90 |     special_type: Union[SubtitlesType, None] = None
 91 | 
 92 |     class ConfigDict:
 93 |         str_strip_whitespace = True
 94 | 
 95 | 
 96 | class MediaBase(BaseModel, ABC):
 97 |     """A base class for media objects."""
 98 | 
 99 | 
100 | class Movie(MediaBase):
101 |     """
102 |     An object containing movie metadata.
103 | 
104 |     Attributes:
105 |         id (str | None, optional): ID of the movie on the service it was scraped from. Defaults to None.
106 |         referrer_id (str | None, optional): ID of the movie on the original referring service. Defaults to None.
107 |         name (str): Title of the movie.
108 |         release_date (datetime | int | None, optional): Release date (datetime), or year (int) of the movie.
109 |             Defaults to None.
110 |         duration (timedelta | None, optional): Duration of the movie. Defaults to None.
111 |         preorder_availability_date (datetime | None, optional):
112 |             Date when the movie will be available for pre-order on the service it was scraped from.
113 |             None if not a pre-order. Defaults to None.
114 |         playlist (str | None, optional): Main playlist URL(s).
115 |     """
116 |     media_type: Literal["movie"] = "movie"
117 |     name: str
118 |     release_date: Union[dt.datetime, int]
119 |     id: Optional[str] = None
120 |     referrer_id: Optional[str] = None
121 |     duration: Optional[dt.timedelta] = None
122 |     preorder_availability_date: Optional[dt.datetime] = None
123 |     playlist: Union[str, list[str], None] = None
124 | 
125 | 
126 | class Episode(MediaBase):
127 |     """
128 |     An object containing episode metadata.
129 | 
130 |     Attributes:
131 |         id (str | None, optional): ID of the episode on the service it was scraped from. Defaults to None.
132 |         referrer_id (str | None, optional): ID of the episode on the original referring service. Defaults to None.
133 |         series_name (str): Name of the series the episode is from.
134 |         series_release_date (datetime | int | None, optional): Release date (datetime), or year (int) of the series.
135 |             Defaults to None.
136 |         season_number (int): Season number.
137 |         season_name (str | None, optional): Season name. Defaults to None.
138 |         episode_number (int): Episode number.
139 |         episode_name (str | None, optional): Episode name. Defaults to None.
140 |         episode_release_date (datetime | None): Release date of the episode. Defaults to None.
141 |         episode_duration (timedelta | None, optional): Duration of the episode. Defaults to None.
142 |         playlist (str | None, optional): Main playlist URL(s).
143 |     """
144 |     media_type: Literal["episode"] = "episode"
145 |     series_name: str
146 |     season_number: int
147 |     episode_number: int
148 |     id: Optional[str] = None
149 |     referrer_id: Optional[str] = None
150 |     series_release_date: Union[dt.datetime, int, None] = None
151 |     season_name: Optional[str] = None
152 |     release_date: Optional[dt.datetime] = None
153 |     duration: Optional[dt.timedelta] = None
154 |     episode_name: Optional[str] = None
155 |     episode_release_date: Optional[dt.datetime] = None
156 |     episode_duration: Optional[dt.timedelta] = None
157 |     playlist: Union[str, list[str], None] = None
158 | 
159 | 
160 | class Season(MediaBase):
161 |     """
162 |     An object containing season metadata.
163 | 
164 |     Attributes:
165 |         id (str | None, optional): ID of the season on the service it was scraped from. Defaults to None.
166 |         referrer_id (str | None, optional): ID of the season on the original referring service. Defaults to None.
167 |         series_name (str): Name of the series the season is from.
168 |         series_release_date (datetime | int | None, optional): Release date (datetime), or year (int) of the series.
169 |             Defaults to None.
170 |         season_name (str | None, optional): Season name. Defaults to None.
171 |         season_release_date (datetime | None, optional): Release date of the season, or release year. Defaults to None.
172 |         episodes (list[Episode]): A list of episode objects containing metadata about episodes of the season.
173 |     """
174 |     media_type: Literal["season"] = "season"
175 |     series_name: str
176 |     season_number: int
177 |     id: Optional[str] = None
178 |     referrer_id: Optional[str] = None
179 |     series_release_date: Union[dt.datetime, int, None] = None
180 |     season_name: Optional[str] = None
181 |     season_release_date: Union[dt.datetime, int, None] = None
182 |     episodes: list[Episode] = []
183 | 
184 | 
185 | class Series(MediaBase):
186 |     """
187 |     An object containing series metadata.
188 | 
189 |     Attributes:
190 |         id (str | None, optional): ID of the series on the service it was scraped from. Defaults to None.
191 |         series_name (str): Series name.
192 |         referrer_id (str | None, optional): ID of the series on the original referring service. Defaults to None.
193 |         series_release_date (datetime | int | None, optional): Release date (datetime), or year (int) of the series.
194 |             Defaults to None.
195 |         seasons (list[Season]): A list of season objects containing metadata about seasons of the series.
196 |     """
197 |     media_type: Literal["series"] = "series"
198 |     series_name: str
199 |     seasons: list[Season] = []
200 |     id: Optional[str] = None
201 |     referrer_id: Optional[str] = None
202 |     series_release_date: Union[dt.datetime, int, None] = None
203 | 
204 | 
205 | class ScrapedMediaResponse(BaseModel, Generic[MediaData]):
206 |     """
207 |     An object containing scraped media data and metadata.
208 | 
209 |     Attributes:
210 |         media_data (list[Movie] | list[Episode] | list[Season] | list[Series]):
211 |             An object containing the scraped media data.
212 |         metadata_scraper (str): ID of the scraper that was used to scrape metadata.
213 |         playlist_scraper (str): ID of the scraper that should be used to parse and scrape the playlist.
214 |         original_data (dict): Original raw data from the API that was used to extract media's data.
215 |     """
216 |     media_data: list[MediaData]
217 |     metadata_scraper: str
218 |     playlist_scraper: str
219 |     original_data: dict
220 | 


--------------------------------------------------------------------------------
/isubrip/logger.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import re
  5 | from functools import lru_cache
  6 | from typing import ClassVar, Dict, Optional, TYPE_CHECKING
  7 | 
  8 | from rich.highlighter import NullHighlighter
  9 | from rich.logging import RichHandler
 10 | 
 11 | from isubrip.cli import console
 12 | from isubrip.constants import (
 13 |     LOG_FILE_NAME,
 14 |     LOG_FILES_PATH,
 15 |     PACKAGE_NAME,
 16 | )
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from rich.console import Console
 20 | 
 21 | BBCOE_REGEX = re.compile(
 22 |     r"(?i)(?P<opening_tag>\[(?P<tag_name>[a-z#@][^[]*?)])(?P<content>.*)(?P<closing_tag>\[/(?P=tag_name)])")
 23 | LOG_FILE_METADATA = "[%(asctime)s | %(levelname)s | %(threadName)s | %(filename)s::%(funcName)s::%(lineno)d] "
 24 | 
 25 | 
 26 | def set_logger(_logger: logging.Logger) -> None:
 27 |     """
 28 |     Set an external logger to be used by the package.
 29 | 
 30 |     Args:
 31 |         _logger (logging.Logger): A logger instance to be used by the package.
 32 |     """
 33 |     global logger
 34 |     logger = _logger
 35 | 
 36 | 
 37 | class CustomStdoutFormatter(RichHandler):
 38 |     """
 39 |     Custom formatter for stdout logging with Rich integration.
 40 |     
 41 |     This formatter adds color to log messages based on their level and
 42 |     supports hiding messages in interactive mode.
 43 |     """
 44 |     LEVEL_COLORS: ClassVar[Dict[int, str]] = {
 45 |         logging.ERROR: "red",
 46 |         logging.WARNING: "dark_orange",
 47 |         logging.DEBUG: "grey54"
 48 |     }
 49 |     
 50 |     def __init__(self, console: Console | None = None, debug_mode: bool = False) -> None:
 51 |         """
 52 |         Initialize the stdout formatter.
 53 |         
 54 |         Args:
 55 |             console (Console | None, optional): Rich console instance to use for output. Defaults to None.
 56 |             debug_mode (bool, optional): Whether to show additional debug information. Defaults to False.
 57 |         """
 58 |         super().__init__(
 59 |             console=console,
 60 |             show_time=debug_mode,
 61 |             show_level=debug_mode,
 62 |             show_path=debug_mode,
 63 |             highlighter=NullHighlighter(),
 64 |             markup=True,
 65 |             log_time_format="%H:%M:%S",
 66 |             rich_tracebacks=debug_mode,
 67 |             tracebacks_extra_lines=0,
 68 |         )
 69 |         self._console = console
 70 | 
 71 |     def emit(self, record: logging.LogRecord) -> None:
 72 |         """
 73 |         Emit a log record, respecting the 'hide_when_interactive' flag.
 74 |         
 75 |         Args:
 76 |             record (LogRecord): The log record to emit.
 77 |         """
 78 |         # Skip emission if record is marked to be hidden in interactive mode
 79 |         if getattr(record, 'hide_when_interactive', False) and self._console and self._console.is_interactive:
 80 |             return
 81 |         super().emit(record)
 82 | 
 83 |     def format(self, record: logging.LogRecord) -> str:
 84 |         """
 85 |         Format the log record with appropriate color based on level.
 86 |         
 87 |         Args:
 88 |             record (LogRecord): The log record to format.
 89 |             
 90 |         Returns:
 91 |             str: Formatted log message with Rich markup.
 92 |         """
 93 |         # Get the message once
 94 |         message = record.getMessage()
 95 |         
 96 |         # Apply color based on log level using the class variable mapping
 97 |         if color := self.LEVEL_COLORS.get(record.levelno):
 98 |             record.msg = f"[{color}]{message}[/{color}]"
 99 |         
100 |         return super().format(record)
101 | 
102 | 
103 | class CustomLogFileFormatter(logging.Formatter):
104 |     """
105 |     Custom formatter for log files that removes Rich markup tags.
106 |     """
107 |     def __init__(self):
108 |         """
109 |         Initialize the formatter with metadata format but without message part.
110 |         We'll append the message manually to avoid issues with special characters.
111 |         """
112 |         super().__init__(
113 |             fmt=LOG_FILE_METADATA,
114 |             datefmt=r"%Y-%m-%d %H:%M:%S",
115 |         )
116 |     
117 |     @staticmethod
118 |     @lru_cache(maxsize=64)
119 |     def _remove_rich_markup(text: str) -> str:
120 |         """
121 |         Remove Rich markup tags from text efficiently with caching.
122 |         
123 |         Args:
124 |             text: Text containing Rich markup tags
125 |             
126 |         Returns:
127 |             Text with Rich markup tags removed
128 |         """
129 |         while match := BBCOE_REGEX.search(text):
130 |             text = text[:match.start()] + match.group('content') + text[match.end():]
131 |         return text
132 |     
133 |     def format(self, record: logging.LogRecord) -> str:
134 |         """
135 |         Format the log record for file output, removing Rich markup.
136 |         This implementation uses the standard formatter for the metadata part
137 |         and then appends the message without formatting to avoid issues with
138 |         special characters within the log message.
139 |         
140 |         Args:
141 |             record: The log record to format
142 |             
143 |         Returns:
144 |             Formatted log message suitable for file output
145 |         """
146 |         message = record.getMessage()
147 |         clean_message = self._remove_rich_markup(message)
148 |         
149 |         # Store the original message
150 |         original_msg = record.msg
151 |         original_args = record.args
152 |         
153 |         # Temporarily set an empty message to format just the metadata
154 |         record.msg = ""
155 |         record.args = None
156 |         
157 |         # Format the metadata part using the standard formatter
158 |         metadata = super().format(record)
159 |         
160 |         # Restore the original message and args
161 |         record.msg = original_msg
162 |         record.args = original_args
163 |         
164 |         # Combine metadata and message without formatting the message
165 |         return metadata + clean_message
166 | 
167 | 
168 | def setup_loggers(stdout_output: bool = True, stdout_console: Optional[Console] = None,
169 |                   stdout_loglevel: int = logging.INFO, logfile_output: bool = True,
170 |                   logfile_loglevel: int = logging.DEBUG) -> None:
171 |     """
172 |     Configure loggers for both stdout and file output.
173 | 
174 |     Args:
175 |         stdout_output (bool, optional): Whether to output logs to STDOUT. Defaults to True.
176 |         stdout_console (Console | None, optional): A Rich console instance to be used for STDOUT logging.
177 |             Relevant only if `stdout_output` is True. Defaults to None.
178 |         stdout_loglevel (int, optional): Log level for STDOUT logger. Relevant only if `stdout_output` is True.
179 |             Defaults to logging.INFO.
180 |         logfile_output (bool, optional): Whether to output logs to a logfile. Defaults to True.
181 |         logfile_loglevel (int, optional): Log level for logfile logger. Relevant only if `logfile_output` is True.
182 |             Defaults to logging.DEBUG.
183 |     """
184 |     logger.handlers.clear()  # Remove and reset existing handlers
185 |     logger.setLevel(logging.DEBUG)
186 | 
187 |     if stdout_output:
188 |         debug_mode = (stdout_loglevel == logging.DEBUG)
189 |         stdout_handler = CustomStdoutFormatter(
190 |             debug_mode=debug_mode,
191 |             console=stdout_console,
192 |         )
193 |         stdout_handler.setLevel(stdout_loglevel)
194 |         logger.addHandler(stdout_handler)
195 | 
196 |     if logfile_output:
197 |         if not LOG_FILES_PATH.is_dir():
198 |             logger.debug("Logs directory could not be found and will be created.")
199 |             LOG_FILES_PATH.mkdir()
200 | 
201 |         logfile_path = LOG_FILES_PATH / LOG_FILE_NAME
202 |         logfile_handler = logging.FileHandler(filename=logfile_path, encoding="utf-8")
203 |         logfile_handler.setLevel(logfile_loglevel)
204 |         logfile_handler.setFormatter(CustomLogFileFormatter())
205 |         logger.debug(f"Log file location: '{logfile_path}'")
206 |         logger.addHandler(logfile_handler)
207 | 
208 | 
209 | logger = logging.getLogger(PACKAGE_NAME)
210 | 
211 | # Temporarily set the logger to INFO level until the config is loaded and the logger is properly set up
212 | logger.setLevel(logging.INFO)
213 | logger.addHandler(CustomStdoutFormatter(console=console))
214 | 


--------------------------------------------------------------------------------
/isubrip/scrapers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelYochpaz/iSubRip/d7d7d1e1e4eaa93d49564b74d0ea0b25e07777b0/isubrip/scrapers/__init__.py


--------------------------------------------------------------------------------
/isubrip/scrapers/appletv_scraper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import datetime as dt
  4 | from enum import Enum
  5 | import fnmatch
  6 | import re
  7 | from typing import Any
  8 | 
  9 | from httpx import HTTPError
 10 | 
 11 | from isubrip.data_structures import Episode, Movie, ScrapedMediaResponse, Season, Series
 12 | from isubrip.logger import logger
 13 | from isubrip.scrapers.scraper import HLSScraper, ScraperError
 14 | from isubrip.subtitle_formats.webvtt import WebVTTSubtitles
 15 | from isubrip.utils import convert_epoch_to_datetime, parse_url_params, raise_for_status
 16 | 
 17 | 
 18 | class AppleTVScraper(HLSScraper):
 19 |     """An Apple TV scraper."""
 20 |     id = "appletv"
 21 |     name = "Apple TV"
 22 |     abbreviation = "ATV"
 23 |     url_regex = re.compile(r"(?i)(?P<base_url>https?://tv\.apple\.com/(?:(?P<country_code>[a-z]{2})/)?(?P<media_type>movie|episode|season|show)/(?:(?P<media_name>[\w\-%]+)/)?(?P<media_id>umc\.cmc\.[a-z\d]{23,25}))(?:\?(?P<url_params>.*))?")
 24 |     subtitles_class = WebVTTSubtitles
 25 |     is_movie_scraper = True
 26 |     is_series_scraper = True
 27 |     uses_scrapers = ["itunes"]
 28 |     default_storefront = "US"
 29 |     storefronts_mapping = {
 30 |         "AF": "143610", "AO": "143564", "AI": "143538", "AL": "143575", "AD": "143611", "AE": "143481", "AR": "143505",
 31 |         "AM": "143524", "AG": "143540", "AU": "143460", "AT": "143445", "AZ": "143568", "BE": "143446", "BJ": "143576",
 32 |         "BF": "143578", "BD": "143490", "BG": "143526", "BH": "143559", "BS": "143539", "BA": "143612", "BY": "143565",
 33 |         "BZ": "143555", "BM": "143542", "BO": "143556", "BR": "143503", "BB": "143541", "BN": "143560", "BT": "143577",
 34 |         "BW": "143525", "CF": "143623", "CA": "143455", "CH": "143459", "CL": "143483", "CN": "143465", "CI": "143527",
 35 |         "CM": "143574", "CD": "143613", "CG": "143582", "CO": "143501", "CV": "143580", "CR": "143495", "KY": "143544",
 36 |         "CY": "143557", "CZ": "143489", "DE": "143443", "DM": "143545", "DK": "143458", "DO": "143508", "DZ": "143563",
 37 |         "EC": "143509", "EG": "143516", "ES": "143454", "EE": "143518", "ET": "143569", "FI": "143447", "FJ": "143583",
 38 |         "FR": "143442", "FM": "143591", "GA": "143614", "GB": "143444", "GE": "143615", "GH": "143573", "GN": "143616",
 39 |         "GM": "143584", "GW": "143585", "GR": "143448", "GD": "143546", "GT": "143504", "GY": "143553", "HK": "143463",
 40 |         "HN": "143510", "HR": "143494", "HU": "143482", "ID": "143476", "IN": "143467", "IE": "143449", "IQ": "143617",
 41 |         "IS": "143558", "IL": "143491", "IT": "143450", "JM": "143511", "JO": "143528", "JP": "143462", "KZ": "143517",
 42 |         "KE": "143529", "KG": "143586", "KH": "143579", "KN": "143548", "KR": "143466", "KW": "143493", "LA": "143587",
 43 |         "LB": "143497", "LR": "143588", "LY": "143567", "LC": "143549", "LI": "143522", "LK": "143486", "LT": "143520",
 44 |         "LU": "143451", "LV": "143519", "MO": "143515", "MA": "143620", "MC": "143618", "MD": "143523", "MG": "143531",
 45 |         "MV": "143488", "MX": "143468", "MK": "143530", "ML": "143532", "MT": "143521", "MM": "143570", "ME": "143619",
 46 |         "MN": "143592", "MZ": "143593", "MR": "143590", "MS": "143547", "MU": "143533", "MW": "143589", "MY": "143473",
 47 |         "NA": "143594", "NE": "143534", "NG": "143561", "NI": "143512", "NL": "143452", "NO": "143457", "NP": "143484",
 48 |         "NR": "143606", "NZ": "143461", "OM": "143562", "PK": "143477", "PA": "143485", "PE": "143507", "PH": "143474",
 49 |         "PW": "143595", "PG": "143597", "PL": "143478", "PT": "143453", "PY": "143513", "PS": "143596", "QA": "143498",
 50 |         "RO": "143487", "RU": "143469", "RW": "143621", "SA": "143479", "SN": "143535", "SG": "143464", "SB": "143601",
 51 |         "SL": "143600", "SV": "143506", "RS": "143500", "ST": "143598", "SR": "143554", "SK": "143496", "SI": "143499",
 52 |         "SE": "143456", "SZ": "143602", "SC": "143599", "TC": "143552", "TD": "143581", "TH": "143475", "TJ": "143603",
 53 |         "TM": "143604", "TO": "143608", "TT": "143551", "TN": "143536", "TR": "143480", "TW": "143470", "TZ": "143572",
 54 |         "UG": "143537", "UA": "143492", "UY": "143514", "US": "143441", "UZ": "143566", "VC": "143550", "VE": "143502",
 55 |         "VG": "143543", "VN": "143471", "VU": "143609", "WS": "143607", "XK": "143624", "YE": "143571", "ZA": "143472",
 56 |         "ZM": "143622", "ZW": "143605",
 57 |     }
 58 | 
 59 |     _api_base_url = "https://tv.apple.com/api/uts/v3"
 60 |     _api_base_params = {
 61 |         "utscf": "OjAAAAAAAAA~",
 62 |         "caller": "web",
 63 |         "v": "81",
 64 |         "pfm": "web",
 65 |     }
 66 | 
 67 |     class Channel(Enum):
 68 |         """
 69 |         An Enum representing AppleTV channels.
 70 |         Value represents the channel ID as used by the API.
 71 |         """
 72 |         APPLE_TV_PLUS = "tvs.sbd.4000"
 73 |         DISNEY_PLUS = "tvs.sbd.1000216"
 74 |         ITUNES = "tvs.sbd.9001"
 75 |         HULU = "tvs.sbd.10000"
 76 |         MAX = "tvs.sbd.9050"
 77 |         NETFLIX = "tvs.sbd.9000"
 78 |         PRIME_VIDEO = "tvs.sbd.12962"
 79 |         STARZ = "tvs.sbd.1000308"
 80 | 
 81 |     def __init__(self, *args: Any, **kwargs: Any) -> None:
 82 |         super().__init__(*args, **kwargs)
 83 |         self._storefronts_request_params_cache: dict[str, dict[str, str]] = {}
 84 | 
 85 |     def _decide_locale(self, preferred_locales: str | list[str], default_locale: str, locales: list[str]) -> str:
 86 |         """
 87 |         Decide which locale to use.
 88 | 
 89 |         Args:
 90 |             preferred_locales (str | list[str]): The preferred locales to use.
 91 |             default_locale (str): The default locale to use if there is no match.
 92 |             locales (list[str]): The locales to search in.
 93 | 
 94 |         Returns:
 95 |             str: The locale to use.
 96 |         """
 97 |         if isinstance(preferred_locales, str):
 98 |             preferred_locales = [preferred_locales]
 99 | 
100 |         for locale in preferred_locales:
101 |             if locale in locales:
102 |                 return locale.replace("_", "-")
103 | 
104 |         if result := fnmatch.filter(locales, "en_*"):
105 |             return result[0].replace("_", "-")
106 | 
107 |         return default_locale
108 | 
109 |     async def _fetch_api_data(self, storefront_id: str, endpoint: str, additional_params: dict | None = None) -> dict:
110 |         """
111 |         Send a request to AppleTV's API and return the JSON response.
112 | 
113 |         Args:
114 |             endpoint (str): The endpoint to send the request to.
115 |             additional_params (dict[str, str]): Additional parameters to send with the request.
116 | 
117 |         Returns:
118 |             dict: The JSON response.
119 | 
120 |         Raises:
121 |             HttpError: If an HTTP error response is received.
122 |         """
123 |         request_params = await self._fetch_request_params(storefront_id=storefront_id)
124 | 
125 |         if additional_params:
126 |             request_params.update(additional_params)
127 | 
128 |         response = await self._async_session.get(url=f"{self._api_base_url}{endpoint}", params=request_params)
129 | 
130 |         try:
131 |             raise_for_status(response)
132 | 
133 |         except HTTPError as e:
134 |             if response.status_code == 404:
135 |                 raise ScraperError(
136 |                     "Media not found. This could indicate that the provided URL is invalid.",
137 |                 ) from e
138 | 
139 |             raise
140 | 
141 |         response_json: dict = response.json()
142 |         response_data: dict = response_json.get("data", {})
143 | 
144 |         return response_data
145 |     
146 |     async def _fetch_request_params(self, storefront_id: str) -> dict[str, str]:
147 |         """
148 |         Fetch from the API request parameters for the given storefront ID.
149 |         Uses caching with `self._storefronts_request_params_cache` for efficiency.
150 | 
151 |         Args:
152 |             storefront_id (str): The ID of the storefront to fetch the request parameters for.
153 | 
154 |         Returns:
155 |             dict: The request parameters for the given storefront ID. If returned from cache, a copy is returned.
156 |         """
157 |         if storefront_cached_params := self._storefronts_request_params_cache.get(storefront_id):
158 |             logger.debug(f"Using cached request parameters for storefront '{storefront_id}':"
159 |                          f"'{storefront_cached_params}'.")
160 |             return storefront_cached_params.copy()
161 | 
162 |         configuration_data = self._get_configuration_data(storefront_id=storefront_id)
163 |         request_params: dict[str, str] = configuration_data["applicationProps"]["requiredParamsMap"]["Default"]
164 |         default_locale: str = configuration_data["applicationProps"]["storefront"]["defaultLocale"]
165 |         available_locales: list[str] = configuration_data["applicationProps"]["storefront"]["localesSupported"]
166 | 
167 |         logger.debug(f"Available locales for storefront '{storefront_id}': {available_locales}'. "
168 |                      f"Storefront's default locale: '{default_locale}'.")
169 | 
170 |         locale = self._decide_locale(
171 |             preferred_locales=["en_US", "en_GB"],
172 |             default_locale=default_locale,
173 |             locales=available_locales,
174 |         )
175 | 
176 |         request_params["sf"] = storefront_id
177 |         request_params["locale"] = locale
178 | 
179 |         logger.debug(f"Using and caching request parameters for storefront '{storefront_id}': {request_params}")
180 |         self._storefronts_request_params_cache[storefront_id] = request_params.copy()
181 | 
182 |         return request_params
183 | 
184 |     def _get_configuration_data(self, storefront_id: str) -> dict:
185 |         """
186 |         Get configuration data for the given storefront ID.
187 | 
188 |         Args:
189 |             storefront_id (str): The ID of the storefront to get the configuration data for.
190 | 
191 |         Returns:
192 |             dict: Configuration data as returned by the API for the given storefront ID.
193 |         """
194 |         logger.debug(f"Fetching configuration data for storefront '{storefront_id}'...")
195 |         url = f"{self._api_base_url}/configurations"
196 | 
197 |         params = self._api_base_params.copy()
198 |         params["sf"] = storefront_id
199 | 
200 |         response = self._session.get(url=url, params=params)
201 |         raise_for_status(response)
202 |         logger.debug("Configuration data fetched successfully.")
203 | 
204 |         response_data: dict = response.json()["data"]
205 |         return response_data
206 | 
207 |     def _map_playables_by_channel(self, playables: list[dict]) -> dict[str, dict]:
208 |         """
209 |         Map playables by channel name.
210 | 
211 |         Args:
212 |             playables (list[dict]): Playables data to map.
213 | 
214 |         Returns:
215 |             dict: The mapped playables (in a `channel_name (str): [playables]` format).
216 |         """
217 |         mapped_playables: dict = {}
218 | 
219 |         for playable in playables:
220 |             if channel_id := playable.get("channelId"):
221 |                 mapped_playables.setdefault(channel_id, []).append(playable)
222 | 
223 |         return mapped_playables
224 | 
225 |     async def get_movie_data(self, storefront_id: str, movie_id: str) -> ScrapedMediaResponse[Movie]:
226 |         data = await self._fetch_api_data(
227 |             storefront_id=storefront_id,
228 |             endpoint=f"/movies/{movie_id}",
229 |         )
230 | 
231 |         mapped_playables = self._map_playables_by_channel(playables=data["playables"].values())
232 |         logger.debug(f"Available channels for movie '{movie_id}': "
233 |                      f"{' '.join(list(mapped_playables.keys()))}")
234 | 
235 |         if self.Channel.ITUNES.value not in mapped_playables:
236 |             if self.Channel.APPLE_TV_PLUS.value in mapped_playables:
237 |                 raise ScraperError("Scraping AppleTV+ content is not currently supported.")
238 | 
239 |             raise ScraperError("No iTunes playables could be found.")
240 | 
241 |         return_data = []
242 | 
243 |         for playable_data in mapped_playables[self.Channel.ITUNES.value]:
244 |             return_data.append(self._extract_itunes_movie_data(playable_data))
245 | 
246 |         if len(return_data) > 1:
247 |             logger.debug(f"{len(return_data)} iTunes playables were found for movie '{movie_id}'.")
248 | 
249 |         return ScrapedMediaResponse(
250 |             media_data=return_data,
251 |             metadata_scraper=self.id,
252 |             playlist_scraper="itunes",
253 |             original_data=data,
254 |         )
255 | 
256 |     def _extract_itunes_movie_data(self, playable_data: dict) -> Movie:
257 |         """
258 |         Extract movie data from an AppleTV's API iTunes playable data.
259 | 
260 |         Args:
261 |             playable_data (dict): The playable data from the AppleTV API.
262 | 
263 |         Returns:
264 |             Movie: A Movie object.
265 |         """
266 |         itunes_movie_id = playable_data["itunesMediaApiData"]["id"]
267 |         appletv_movie_id = playable_data["canonicalId"]
268 |         movie_title = playable_data["canonicalMetadata"]["movieTitle"]
269 |         movie_release_date = convert_epoch_to_datetime(playable_data["canonicalMetadata"]["releaseDate"] // 1000)
270 | 
271 |         movie_playlists = []
272 |         movie_duration = None
273 | 
274 |         if offers := playable_data["itunesMediaApiData"].get("offers"):
275 |             for offer in offers:
276 |                 if (playlist := offer.get("hlsUrl")) and offer["hlsUrl"] not in movie_playlists:
277 |                     movie_playlists.append(playlist)
278 | 
279 |             if movie_duration_int := offers[0].get("durationInMilliseconds"):
280 |                 movie_duration = dt.timedelta(milliseconds=movie_duration_int)
281 | 
282 |         if movie_expected_release_date := playable_data["itunesMediaApiData"].get("futureRentalAvailabilityDate"):
283 |             movie_expected_release_date = dt.datetime.strptime(movie_expected_release_date, "%Y-%m-%d")
284 | 
285 |         return Movie(
286 |             id=itunes_movie_id,
287 |             referrer_id=appletv_movie_id,
288 |             name=movie_title,
289 |             release_date=movie_release_date,
290 |             duration=movie_duration,
291 |             preorder_availability_date=movie_expected_release_date,
292 |             playlist=movie_playlists if movie_playlists else None,
293 |         )
294 | 
295 |     async def get_episode_data(self, storefront_id: str, episode_id: str) -> ScrapedMediaResponse[Episode]:
296 |         raise NotImplementedError("Series scraping is not currently supported.")
297 | 
298 |     async def get_season_data(self, storefront_id: str, season_id: str, show_id: str) -> ScrapedMediaResponse[Season]:
299 |         raise NotImplementedError("Series scraping is not currently supported.")
300 | 
301 |     async def get_show_data(self, storefront_id: str, show_id: str) -> ScrapedMediaResponse[Series]:
302 |         raise NotImplementedError("Series scraping is not currently supported.")
303 | 
304 |     async def get_data(self, url: str) -> ScrapedMediaResponse:
305 |         regex_match = self.match_url(url=url, raise_error=True)
306 |         url_data = regex_match.groupdict()
307 | 
308 |         media_type = url_data["media_type"]
309 | 
310 |         if storefront_code := url_data.get("country_code"):
311 |             storefront_code = storefront_code.upper()
312 | 
313 |         else:
314 |             storefront_code = self.default_storefront
315 | 
316 |         media_id = url_data["media_id"]
317 | 
318 |         if storefront_code not in self.storefronts_mapping:
319 |             raise ScraperError(f"ID mapping for storefront '{storefront_code}' could not be found.")
320 | 
321 |         storefront_id = self.storefronts_mapping[storefront_code]
322 | 
323 |         if media_type == "movie":
324 |             return await self.get_movie_data(storefront_id=storefront_id, movie_id=media_id)
325 | 
326 |         if media_type == "episode":
327 |             return await self.get_episode_data(storefront_id=storefront_id, episode_id=media_id)
328 | 
329 |         if media_type == "season":
330 |             if (url_params := url_data.get("url_params")) and (show_id := parse_url_params(url_params).get("showId")):
331 |                 return await self.get_season_data(storefront_id=storefront_id, season_id=media_id, show_id=show_id)
332 | 
333 |             raise ScraperError("Invalid AppleTV URL: Missing 'showId' parameter.")
334 | 
335 |         if media_type == "show":
336 |             return await self.get_show_data(storefront_id=storefront_id, show_id=media_id)
337 | 
338 |         raise ScraperError(f"Invalid media type '{media_type}'.")
339 | 


--------------------------------------------------------------------------------
/isubrip/scrapers/itunes_scraper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | import re
  5 | from typing import TYPE_CHECKING, Any
  6 | 
  7 | from isubrip.logger import logger
  8 | from isubrip.scrapers.scraper import HLSScraper, ScraperError, ScraperFactory
  9 | from isubrip.subtitle_formats.webvtt import WebVTTSubtitles
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from m3u8.model import Media
 13 | 
 14 |     from isubrip.data_structures import Movie, ScrapedMediaResponse
 15 | 
 16 | 
 17 | REDIRECT_MAX_RETRIES = 5
 18 | REDIRECT_SLEEP_TIME = 2
 19 | 
 20 | class ItunesScraper(HLSScraper):
 21 |     """An iTunes movie data scraper."""
 22 |     id = "itunes"
 23 |     name = "iTunes"
 24 |     abbreviation = "iT"
 25 |     url_regex = re.compile(r"(?i)(?P<base_url>https?://itunes\.apple\.com/(?:(?P<country_code>[a-z]{2})/)?(?P<media_type>movie|tv-show|tv-season|show)/(?:(?P<media_name>[\w\-%]+)/)?(?P<media_id>id\d{9,10}))(?:\?(?P<url_params>.*))?")
 26 |     subtitles_class = WebVTTSubtitles
 27 |     is_movie_scraper = True
 28 |     uses_scrapers = ["appletv"]
 29 | 
 30 |     _subtitles_filters = {
 31 |         HLSScraper.M3U8Attribute.GROUP_ID.value: ["subtitles_ak", "subtitles_vod-ak-amt.tv.apple.com"],
 32 |         **HLSScraper._subtitles_filters,  # noqa: SLF001
 33 |     }
 34 | 
 35 |     def __init__(self, *args: Any, **kwargs: Any) -> None:
 36 |         super().__init__(*args, **kwargs)
 37 |         self._appletv_scraper = ScraperFactory.get_scraper_instance(
 38 |             scraper_id="appletv",
 39 |             raise_error=True,
 40 |         )
 41 | 
 42 |     async def get_data(self, url: str) -> ScrapedMediaResponse[Movie]:
 43 |         """
 44 |         Scrape iTunes to find info about a movie, and it's M3U8 main_playlist.
 45 | 
 46 |         Args:
 47 |             url (str): An iTunes store movie URL.
 48 | 
 49 |         Raises:
 50 |             InvalidURL: `itunes_url` is not a valid iTunes store movie URL.
 51 |             PageLoadError: HTML page did not load properly.
 52 |             HTTPError: HTTP request failed.
 53 | 
 54 |         Returns:
 55 |             Movie: A Movie (NamedTuple) object with movie's name, and an M3U8 object of the main_playlist
 56 |             if the main_playlist is found. None otherwise.
 57 |         """
 58 |         regex_match = self.match_url(url, raise_error=True)
 59 |         url_data = regex_match.groupdict()
 60 |         country_code: str = url_data["country_code"]
 61 |         media_id: str = url_data["media_id"]
 62 |         appletv_redirect_finding_url = f"https://tv.apple.com/{country_code}/movie/{media_id}"
 63 | 
 64 |         logger.debug("Attempting to fetch redirect location from: " + appletv_redirect_finding_url)
 65 | 
 66 |         retries = 0
 67 |         while True:
 68 |             response = await self._async_session.get(url=appletv_redirect_finding_url, follow_redirects=False)
 69 |             if response.status_code != 301 and retries < REDIRECT_MAX_RETRIES:
 70 |                 retries += 1
 71 |                 logger.debug(f"AppleTV redirect URL not found (Response code: {response.status_code}),"
 72 |                                f" retrying... ({retries}/{REDIRECT_MAX_RETRIES})")
 73 |                 await asyncio.sleep(REDIRECT_SLEEP_TIME)
 74 |                 continue
 75 |             break
 76 | 
 77 |         redirect_location = response.headers.get("Location")
 78 | 
 79 |         if response.status_code != 301 or not redirect_location:
 80 |             raise ScraperError(f"AppleTV redirect URL not found (Response code: {response.status_code}).")
 81 | 
 82 |         # Add 'https:' if redirect_location starts with '//'
 83 |         if redirect_location.startswith('//'):
 84 |             redirect_location = "https:" + redirect_location
 85 | 
 86 |         logger.debug(f"Redirect URL: {redirect_location}")
 87 | 
 88 |         if not self._appletv_scraper.match_url(redirect_location):
 89 |             raise ScraperError("Redirect URL is not a valid AppleTV URL.")
 90 | 
 91 |         return await self._appletv_scraper.get_data(url=redirect_location)
 92 | 
 93 |     @staticmethod
 94 |     def parse_language_name(media_data: Media) -> str | None:
 95 |         name: str | None = media_data.name
 96 | 
 97 |         if name:
 98 |             return name.replace(' (forced)', '').strip()
 99 | 
100 |         return None
101 | 


--------------------------------------------------------------------------------
/isubrip/scrapers/scraper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABC, abstractmethod
  4 | import asyncio
  5 | from enum import Enum
  6 | import importlib
  7 | import inspect
  8 | from pathlib import Path
  9 | import re
 10 | import sys
 11 | from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, TypeVar, Union, overload
 12 | 
 13 | import httpx
 14 | import m3u8
 15 | from pydantic import AliasGenerator, BaseModel, ConfigDict, Field, create_model
 16 | 
 17 | from isubrip.constants import PACKAGE_NAME, SCRAPER_MODULES_SUFFIX
 18 | from isubrip.data_structures import (
 19 |     MainPlaylist,
 20 |     PlaylistMediaItem,
 21 |     ScrapedMediaResponse,
 22 |     SubtitlesData,
 23 |     SubtitlesFormatType,
 24 |     SubtitlesType,
 25 | )
 26 | from isubrip.logger import logger
 27 | from isubrip.utils import (
 28 |     SingletonMeta,
 29 |     format_subtitles_description,
 30 |     get_model_field,
 31 |     merge_dict_values,
 32 |     return_first_valid,
 33 |     single_string_to_list,
 34 | )
 35 | 
 36 | if TYPE_CHECKING:
 37 |     from types import TracebackType
 38 | 
 39 |     from isubrip.subtitle_formats.subtitles import Subtitles
 40 | 
 41 | 
 42 | ScraperT = TypeVar("ScraperT", bound="Scraper")
 43 | 
 44 | 
 45 | class ScraperConfigBase(BaseModel, ABC):
 46 |     """
 47 |     A Pydantic BaseModel for base class for scraper's configuration classes.
 48 |     Also serves for setting default configuration settings for all scrapers.
 49 | 
 50 |     Attributes:
 51 |         timeout (int | float): Timeout to use when making requests.
 52 |         user_agent (st): User agent to use when making requests.
 53 |         proxy (str | None): Proxy to use when making requests.
 54 |         verify_ssl (bool): Whether to verify SSL certificates.
 55 |     """
 56 |     model_config = ConfigDict(
 57 |         extra='forbid',
 58 |         alias_generator=AliasGenerator(
 59 |             validation_alias=lambda field_name: field_name.replace('_', '-'),
 60 |         ),
 61 |     )
 62 | 
 63 |     timeout: Union[int, float, None] = Field(default=None)
 64 |     user_agent: Union[str, None] = Field(default=None)
 65 |     proxy: Union[str, None] = Field(default=None)
 66 |     verify_ssl: Union[bool, None] = Field(default=None)
 67 | 
 68 | 
 69 | class DefaultScraperConfig(ScraperConfigBase):
 70 |     """
 71 |     A Pydantic BaseModel for scraper's configuration classes.
 72 |     Also serves as a default configuration for all scrapers.
 73 | 
 74 |     Attributes:
 75 |         timeout (int | float): Timeout to use when making requests.
 76 |         user_agent (st): User agent to use when making requests.
 77 |         proxy (str | None): Proxy to use when making requests.
 78 |         verify_ssl (bool): Whether to verify SSL certificates.
 79 |     """
 80 |     timeout: Union[int, float] = Field(default=10)
 81 |     user_agent: str = Field(
 82 |         default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",    # noqa: E501
 83 |     )
 84 |     proxy: Optional[str] = Field(default=None)
 85 |     verify_ssl: bool = Field(default=True)
 86 | 
 87 | 
 88 | class ScraperConfigSubcategory(BaseModel, ABC):
 89 |     """A Pydantic BaseModel for a scraper's configuration subcategory (which can be set under 'ScraperConfig')."""
 90 |     model_config = ConfigDict(
 91 |         extra='forbid',
 92 |         alias_generator=AliasGenerator(
 93 |             validation_alias=lambda field_name: field_name.replace('_', '-'),
 94 |         ),
 95 |     )
 96 | 
 97 | class Scraper(ABC, metaclass=SingletonMeta):
 98 |     """
 99 |     A base class for scrapers.
100 | 
101 |     Attributes:
102 |         default_user_agent (str): [Class Attribute]
103 |             Default user agent to use if no other user agent is specified when making requests.
104 |         default_proxy (str | None): [Class Attribute] Default proxy to use when making requests.
105 |         default_verify_ssl (bool): [Class Attribute] Whether to verify SSL certificates by default.
106 |         subtitles_fix_rtl (bool): [Class Attribute] Whether to fix RTL from downloaded subtitles.
107 |             A list of languages to fix RTL on. If None, a default list will be used.
108 |         subtitles_remove_duplicates (bool): [Class Attribute]
109 |             Whether to remove duplicate lines from downloaded subtitles.
110 | 
111 |         id (str): [Class Attribute] ID of the scraper (must be unique).
112 |         name (str): [Class Attribute] Name of the scraper.
113 |         abbreviation (str): [Class Attribute] Abbreviation of the scraper.
114 |         url_regex (re.Pattern | list[re.Pattern]): [Class Attribute] A RegEx pattern to find URLs matching the service.
115 |         subtitles_class (type[Subtitles]): [Class Attribute] Class of the subtitles format returned by the scraper.
116 |         is_movie_scraper (bool): [Class Attribute] Whether the scraper is for movies.
117 |         is_series_scraper (bool): [Class Attribute] Whether the scraper is for series.
118 |         uses_scrapers (list[str]): [Class Attribute] A list of IDs for other scraper classes that this scraper uses.
119 |             This assures that the config data for the other scrapers is passed as well.
120 |         config (ScraperConfig | None): [Class Attribute] A ScraperConfig instance for the scraper,
121 |             containing configurations.
122 |         _session (httpx.Client): A synchronous HTTP client session.
123 |         _async_session (httpx.AsyncClient): An asynchronous HTTP client session.
124 | 
125 |     Notes:
126 |         Each scraper implements its own `ScraperConfig` class (which can be overridden and updated),
127 |          inheriting from `ScraperConfigBase`, which sets configurable options for the scraper.
128 |     """
129 | 
130 |     class ScraperConfig(ScraperConfigBase):
131 |         """A class representing scraper's configuration settings.
132 |            Can be overridden to create a custom configuration with overridden default values,
133 |            and additional settings."""
134 |     
135 |     default_timeout: ClassVar[int | float] = 10
136 |     default_user_agent: ClassVar[str] = httpx._client.USER_AGENT  # noqa: SLF001
137 |     default_proxy: ClassVar[str | None] = None
138 |     default_verify_ssl: ClassVar[bool] = True
139 |     subtitles_fix_rtl: ClassVar[bool] = False
140 |     subtitles_remove_duplicates: ClassVar[bool] = True
141 | 
142 |     id: ClassVar[str]
143 |     name: ClassVar[str]
144 |     abbreviation: ClassVar[str]
145 |     url_regex: ClassVar[re.Pattern | list[re.Pattern]]
146 |     subtitles_class: ClassVar[type[Subtitles]]
147 |     is_movie_scraper: ClassVar[bool] = False
148 |     is_series_scraper: ClassVar[bool] = False
149 |     uses_scrapers: ClassVar[list[str]] = []
150 |     config: ClassVar[ScraperConfig | None] = None
151 | 
152 |     def __init__(self, timeout: int | float | None = None, user_agent: str | None = None,
153 |                  proxy: str | None = None, verify_ssl: bool | None = None):
154 |         """
155 |         Initialize a Scraper object.
156 | 
157 |         Args:
158 |             timeout (int | float | None, optional): A timeout to use when making requests. Defaults to None.
159 |             user_agent (str | None, optional): A user agent to use when making requests. Defaults to None.
160 |             proxy (str | None, optional): A proxy to use when making requests. Defaults to None.
161 |             verify_ssl (bool | None, optional): Whether to verify SSL certificates. Defaults to None.
162 |         """
163 |         self._timeout = return_first_valid(timeout,
164 |                                            get_model_field(model=self.config, field='timeout'),
165 |                                            self.default_timeout,
166 |                                            raise_error=True)
167 |         self._user_agent = return_first_valid(user_agent,
168 |                                               get_model_field(model=self.config, field='user_agent'),
169 |                                               self.default_user_agent,
170 |                                               raise_error=True)
171 |         self._proxy = return_first_valid(proxy,
172 |                                          get_model_field(model=self.config, field='proxy'),
173 |                                          self.default_proxy)
174 |         self._verify_ssl = return_first_valid(verify_ssl,
175 |                                               get_model_field(model=self.config, field='verify_ssl'),
176 |                                               self.default_verify_ssl,
177 |                                               raise_error=True)
178 | 
179 |         if self._timeout != self.default_timeout:
180 |             logger.debug(f"Initializing '{self.name}' scraper with custom timeout: '{self._timeout}'.")
181 | 
182 |         if self._user_agent != self.default_user_agent:
183 |             logger.debug(f"Initializing '{self.name}' scraper with custom user-agent: '{self._user_agent}'.")
184 | 
185 |         if self._proxy != self.default_proxy:
186 |             logger.debug(f"Initializing '{self.name}' scraper with proxy: '{self._proxy}'.")
187 | 
188 |         if self._verify_ssl != self.default_verify_ssl:
189 |             logger.debug(f"Initializing '{self.name}' scraper with SSL verification set to: '{self._verify_ssl}'.")
190 | 
191 |         self._requests_counter = 0
192 |         clients_params: dict[str, Any] = {
193 |             "headers": {"User-Agent": self._user_agent},
194 |             "verify": self._verify_ssl,
195 |             "proxy": self._proxy,
196 |             "timeout": float(self._timeout),
197 |         }
198 |         self._session = httpx.Client(
199 |             **clients_params,
200 |             event_hooks={
201 |                 "request": [self._increment_requests_counter],
202 |             },
203 |         )
204 |         self._async_session = httpx.AsyncClient(
205 |             **clients_params,
206 |             event_hooks={
207 |                 "request": [self._async_increment_requests_counter],
208 |             },
209 |         )
210 | 
211 |         # Update session settings according to configurations
212 |         self._session.headers.update({"User-Agent": self._user_agent})
213 |         self._async_session.headers.update({"User-Agent": self._user_agent})
214 | 
215 |     def _increment_requests_counter(self, request: httpx.Request) -> None:  # noqa: ARG002
216 |         self._requests_counter += 1
217 | 
218 |     async def _async_increment_requests_counter(self, request: httpx.Request) -> None:  # noqa: ARG002
219 |         self._requests_counter += 1
220 | 
221 |     @property
222 |     def requests_count(self) -> int:
223 |         return self._requests_counter
224 | 
225 |     @classmethod
226 |     @overload
227 |     def match_url(cls, url: str, raise_error: Literal[True] = ...) -> re.Match:
228 |         ...
229 | 
230 |     @classmethod
231 |     @overload
232 |     def match_url(cls, url: str, raise_error: Literal[False] = ...) -> re.Match | None:
233 |         ...
234 | 
235 |     @classmethod
236 |     def match_url(cls, url: str, raise_error: bool = False) -> re.Match | None:
237 |         """
238 |         Checks if a URL matches scraper's url regex.
239 | 
240 |         Args:
241 |             url (str): A URL to check against the regex.
242 |             raise_error (bool, optional): Whether to raise an error instead of returning None if the URL doesn't match.
243 | 
244 |         Returns:
245 |             re.Match | None: A Match object if the URL matches the regex, None otherwise (if raise_error is False).
246 | 
247 |         Raises:
248 |             ValueError: If the URL doesn't match the regex and raise_error is True.
249 |         """
250 |         if isinstance(cls.url_regex, re.Pattern) and (match_result := re.fullmatch(pattern=cls.url_regex, string=url)):
251 |             return match_result
252 | 
253 |         if isinstance(cls.url_regex, list):
254 |             for url_regex_item in cls.url_regex:
255 |                 if result := re.fullmatch(pattern=url_regex_item, string=url):
256 |                     return result
257 | 
258 |         if raise_error:
259 |             raise ValueError(f"URL '{url}' doesn't match the URL regex of {cls.name}.")
260 | 
261 |         return None
262 | 
263 |     def __enter__(self) -> Scraper:
264 |         return self
265 | 
266 |     def __exit__(self, exc_type: type[BaseException] | None,
267 |                  exc_val: BaseException | None, exc_tb: TracebackType | None) -> None:
268 |         self.close()
269 | 
270 |     async def async_close(self) -> None:
271 |         await self._async_session.aclose()
272 | 
273 |     def close(self) -> None:
274 |         self._session.close()
275 | 
276 |     @abstractmethod
277 |     async def get_data(self, url: str) -> ScrapedMediaResponse:
278 |         """
279 |         Scrape media information about the media on a URL.
280 | 
281 |         Args:
282 |             url (str): A URL to get media information about.
283 | 
284 |         Returns:
285 |             ScrapedMediaResponse: A ScrapedMediaResponse object containing scraped media information.
286 |         """
287 | 
288 |     @abstractmethod
289 |     async def download_subtitles(self, media_data: PlaylistMediaItem, subrip_conversion: bool = False) -> SubtitlesData:
290 |         """
291 |         Download subtitles from a media object.
292 | 
293 |         Args:
294 |             media_data (PlaylistMediaItem): A media object to download subtitles from.
295 |             subrip_conversion (bool, optional): Whether to convert the subtitles to SubRip format. Defaults to False.
296 | 
297 |         Returns:
298 |             SubtitlesData: A SubtitlesData object containing downloaded subtitles.
299 |         
300 |         Raises:
301 |             SubtitlesDownloadError: If the subtitles failed to download.
302 |         """
303 | 
304 |     @abstractmethod
305 |     def find_matching_media(self, main_playlist: MainPlaylist,
306 |                             filters: dict[str, str | list[str]] | None = None) -> list:
307 |         """
308 |         Find media items that match the given filters in the main playlist (or all media items if no filters are given).
309 | 
310 |         Args:
311 |             main_playlist (MainPlaylist): Main playlist to search for media items in.
312 |             filters (dict[str, str | list[str]] | None, optional): A dictionary of filters to match media items against.
313 |                 Defaults to None.
314 | 
315 |         Returns:
316 |             list: A list of media items that match the given filters.
317 |         """
318 | 
319 |     @abstractmethod
320 |     def find_matching_subtitles(self, main_playlist: MainPlaylist,
321 |                                 language_filter: list[str] | None = None) -> list[PlaylistMediaItem]:
322 |         """
323 |         Find subtitles that match the given language filter in the main playlist.
324 | 
325 |         Args:
326 |             main_playlist (MainPlaylist): Main playlist to search for subtitles in.
327 |             language_filter (list[str] | None, optional): A list of language codes to filter subtitles by.
328 |                 Defaults to None.
329 | 
330 |         Returns:
331 |             list[PlaylistMediaItem]: A list of subtitles media objects that match the given language filter.
332 |         """
333 | 
334 |     @abstractmethod
335 |     async def load_playlist(self, url: str | list[str], headers: dict | None = None) -> MainPlaylist | None:
336 |         """
337 |         Load a playlist from a URL to a representing object.
338 |         Multiple URLs can be given, in which case the first one that loads successfully will be returned.
339 | 
340 |         Args:
341 |             url (str | list[str]): URL of the M3U8 playlist to load. Can also be a list of URLs (for redundancy).
342 |             headers (dict | None, optional): A dictionary of headers to use when making the request.
343 |                 Defaults to None (results in using session's configured headers).
344 | 
345 |         Returns:
346 |             MainPlaylist | None: A playlist object (matching the type), or None if the playlist couldn't be loaded.
347 |         """
348 | 
349 | 
350 |     @staticmethod
351 |     @abstractmethod
352 |     def detect_subtitles_type(subtitles_media: PlaylistMediaItem) -> SubtitlesType | None:
353 |         """
354 |         Detect the subtitles type (Closed Captions, Forced, etc.) from a media object.
355 | 
356 |         Args:
357 |             subtitles_media (PlaylistMediaItem): Subtitles media object to detect the type of.
358 | 
359 |         Returns:
360 |             SubtitlesType | None: The type of the subtitles, None for regular subtitles.
361 |         """
362 | 
363 | 
364 |     @classmethod
365 |     @abstractmethod
366 |     def format_subtitles_description(cls, subtitles_media: PlaylistMediaItem) -> str:
367 |         """
368 |         Format a description of the subtitles media object.
369 |         
370 |         Args:
371 |             subtitles_media (PlaylistMediaItem): Subtitles media object to format the description of.
372 |         
373 |         Returns:
374 |             str: A formatted description of the subtitles media object.
375 | 
376 |         Raises:
377 |             ValueError: If minimal required data is missing from the media object.
378 |         """
379 | 
380 | 
381 | class HLSScraper(Scraper, ABC):
382 |     """A base class for HLS (m3u8) scrapers."""
383 |     class M3U8Attribute(Enum):
384 |         """
385 |         An enum representing all possible M3U8 attributes.
386 |         Names / Keys represent M3U8 Media object attributes (should be converted to lowercase),
387 |         and values represent the name of the key for config usage.
388 |         """
389 |         ASSOC_LANGUAGE = "assoc-language"
390 |         AUTOSELECT = "autoselect"
391 |         CHARACTERISTICS = "characteristics"
392 |         CHANNELS = "channels"
393 |         DEFAULT = "default"
394 |         FORCED = "forced"
395 |         GROUP_ID = "group-id"
396 |         INSTREAM_ID = "instream-id"
397 |         LANGUAGE = "language"
398 |         NAME = "name"
399 |         STABLE_RENDITION_ID = "stable-rendition-id"
400 |         TYPE = "type"
401 | 
402 |     default_playlist_filters: ClassVar[dict[str, str | list[str] | None] | None] = None
403 | 
404 |     _subtitles_filters: dict[str, str | list[str]] = {
405 |         M3U8Attribute.TYPE.value: "SUBTITLES",
406 |     }
407 | 
408 |     # Resolve mypy errors as mypy doesn't support dynamic models.
409 |     if TYPE_CHECKING:
410 |         PlaylistFiltersSubcategory = ScraperConfigSubcategory
411 | 
412 |     else:
413 |         PlaylistFiltersSubcategory = create_model(
414 |             "PlaylistFiltersSubcategory",
415 |             __base__=ScraperConfigSubcategory,
416 |             **{
417 |                 m3u8_attribute.value: (Union[str, list[str], None],
418 |                                        Field(default=None))
419 |                 for m3u8_attribute in M3U8Attribute
420 |             },  # type: ignore[call-overload]
421 |         )
422 | 
423 | 
424 |     class ScraperConfig(Scraper.ScraperConfig):
425 |         playlist_filters: HLSScraper.PlaylistFiltersSubcategory = Field(  # type: ignore[valid-type]
426 |             default_factory=lambda: HLSScraper.PlaylistFiltersSubcategory(),
427 |         )
428 | 
429 | 
430 |     def __init__(self, playlist_filters: dict[str, str | list[str] | None] | None = None,
431 |                  *args: Any, **kwargs: Any) -> None:
432 |         super().__init__(*args, **kwargs)
433 |         self._playlist_filters = return_first_valid(playlist_filters,
434 |                                                     get_model_field(model=self.config,
435 |                                                                     field='playlist_filters',
436 |                                                                     convert_to_dict=True),
437 |                                                     self.default_playlist_filters)
438 | 
439 |         if self._playlist_filters:
440 |             # Remove None values from the filters (mainly caused by config defaults)
441 |             self._playlist_filters = {key: value for key, value in self._playlist_filters.items() if value is not None}
442 | 
443 |             if self._playlist_filters:  # If there are any filters left
444 |                 logger.debug(f"Scraper '{self.name}' initialized with playlist filters: {self._playlist_filters}.")
445 | 
446 |     @staticmethod
447 |     def parse_language_name(media_data: m3u8.Media) -> str | None:
448 |         """
449 |         Parse the language name from an M3U8 Media object.
450 |         Can be overridden in subclasses for normalization.
451 | 
452 |         Args:
453 |             media_data (m3u8.Media): Media object to parse the language name from.
454 | 
455 |         Returns:
456 |             str | None: The language name if found, None otherwise.
457 |         """
458 |         name: str | None = media_data.name
459 |         return name
460 | 
461 |     async def load_playlist(self, url: str | list[str], headers: dict | None = None) -> m3u8.M3U8 | None:
462 |         _headers = headers or self._session.headers
463 |         result: m3u8.M3U8 | None = None
464 | 
465 |         for url_item in single_string_to_list(item=url):
466 |             try:
467 |                 response = await self._async_session.get(url=url_item, headers=_headers, timeout=5)
468 | 
469 |             except Exception as e:
470 |                 logger.debug(f"Failed to load M3U8 playlist '{url_item}': {e}")
471 |                 continue
472 | 
473 |             if not response.text:
474 |                 raise PlaylistLoadError("Received empty response for playlist from server.")
475 | 
476 |             result = m3u8.loads(content=response.text, uri=url_item)
477 |             break
478 | 
479 |         return result
480 | 
481 |     @staticmethod
482 |     def detect_subtitles_type(subtitles_media: m3u8.Media) -> SubtitlesType | None:
483 |         """
484 |         Detect the subtitles type (Closed Captions, Forced, etc.) from an M3U8 Media object.
485 | 
486 |         Args:
487 |             subtitles_media (m3u8.Media): Subtitles Media object to detect the type of.
488 | 
489 |         Returns:
490 |             SubtitlesType | None: The type of the subtitles, None for regular subtitles.
491 |         """
492 |         if subtitles_media.forced == "YES":
493 |             return SubtitlesType.FORCED
494 | 
495 |         if subtitles_media.characteristics is not None and "public.accessibility" in subtitles_media.characteristics:
496 |             return SubtitlesType.CC
497 | 
498 |         return None
499 | 
500 |     async def download_subtitles(self, media_data: m3u8.Media, subrip_conversion: bool = False) -> SubtitlesData:
501 |         try:
502 |             playlist_m3u8 = await self.load_playlist(url=media_data.absolute_uri)
503 | 
504 |             if playlist_m3u8 is None:
505 |                 raise PlaylistLoadError("Could not load subtitles M3U8 playlist.")  # noqa: TRY301
506 | 
507 |             if not media_data.language:
508 |                 raise ValueError("Language code not found in media data.")  # noqa: TRY301
509 | 
510 |             downloaded_segments = await self.download_segments(playlist=playlist_m3u8)
511 |             subtitles = self.subtitles_class(data=downloaded_segments[0], language_code=media_data.language)
512 | 
513 |             if len(downloaded_segments) > 1:
514 |                 for segment_data in downloaded_segments[1:]:
515 |                     segment_subtitles_obj = self.subtitles_class(data=segment_data, language_code=media_data.language)
516 |                     subtitles.append_subtitles(segment_subtitles_obj)
517 | 
518 |             subtitles.polish(
519 |                 fix_rtl=self.subtitles_fix_rtl,
520 |                 remove_duplicates=self.subtitles_remove_duplicates,
521 |             )
522 | 
523 |             if subrip_conversion:
524 |                 subtitles_format = SubtitlesFormatType.SUBRIP
525 |                 content = subtitles.to_srt().dump()
526 | 
527 |             else:
528 |                 subtitles_format = SubtitlesFormatType.WEBVTT
529 |                 content = subtitles.dump()
530 | 
531 |             return SubtitlesData(
532 |                 language_code=media_data.language,
533 |                 language_name=self.parse_language_name(media_data=media_data),
534 |                 subtitles_format=subtitles_format,
535 |                 content=content,
536 |                 content_encoding=subtitles.encoding,
537 |                 special_type=self.detect_subtitles_type(subtitles_media=media_data),
538 |             )
539 |     
540 |         except Exception as e:
541 |             raise SubtitlesDownloadError(
542 |                 language_code=media_data.language,
543 |                 language_name=self.parse_language_name(media_data=media_data),
544 |                 special_type=self.detect_subtitles_type(subtitles_media=media_data),
545 |                 original_exc=e,
546 |             ) from e
547 | 
548 |     async def download_segments(self, playlist: m3u8.M3U8) -> list[bytes]:
549 |         responses = await asyncio.gather(
550 |             *[
551 |                 self._async_session.get(url=segment.absolute_uri)
552 |                 for segment in playlist.segments
553 |             ],
554 |         )
555 | 
556 |         responses_data = []
557 | 
558 |         for result in responses:
559 |             try:
560 |                 result.raise_for_status()
561 |                 responses_data.append(result.content)
562 | 
563 |             except Exception as e:
564 |                 raise DownloadError("One of the subtitles segments failed to download.") from e
565 | 
566 |         return responses_data
567 | 
568 |     def find_matching_media(self, main_playlist: m3u8.M3U8,
569 |                             filters: dict[str, str | list[str]] | None = None) -> list[m3u8.Media]:
570 |         results: list[m3u8.Media] = []
571 |         playlist_filters: dict[str, Union[str, list[str]]] | None
572 | 
573 |         if self._playlist_filters:
574 |             # Merge filtering dictionaries into a single dictionary
575 |             playlist_filters = merge_dict_values(
576 |                 *[dict_item for dict_item in (filters, self._playlist_filters)
577 |                   if dict_item is not None],
578 |             )
579 | 
580 |         else:
581 |             playlist_filters = filters
582 | 
583 |         for media in main_playlist.media:
584 |             if not playlist_filters:
585 |                 results.append(media)
586 |                 continue
587 | 
588 |             is_valid = True
589 | 
590 |             for filter_name, filter_value in playlist_filters.items():
591 |                 # Skip filter if its value is None
592 |                 if filter_value is None:
593 |                     continue
594 | 
595 |                 try:
596 |                     filter_name_enum = HLSScraper.M3U8Attribute(filter_name)
597 |                     attribute_value = getattr(media, filter_name_enum.name.lower(), None)
598 | 
599 |                     if (attribute_value is None) or (
600 |                             isinstance(filter_value, list) and
601 |                             attribute_value.casefold() not in (x.casefold() for x in filter_value)
602 |                     ) or (
603 |                             isinstance(filter_value, str) and filter_value.casefold() != attribute_value.casefold()
604 |                     ):
605 |                         is_valid = False
606 |                         break
607 | 
608 |                 except Exception:
609 |                     is_valid = False
610 | 
611 |             if is_valid:
612 |                 results.append(media)
613 | 
614 |         return results
615 | 
616 |     def find_matching_subtitles(self, main_playlist: m3u8.M3U8,
617 |                                 language_filter: list[str] | None = None) -> list[m3u8.Media]:
618 |         _filters = self._subtitles_filters
619 | 
620 |         if language_filter:
621 |             _filters[self.M3U8Attribute.LANGUAGE.value] = language_filter
622 | 
623 |         return self.find_matching_media(main_playlist=main_playlist, filters=_filters)
624 |     
625 |     @classmethod
626 |     def format_subtitles_description(cls, subtitles_media: m3u8.Media) -> str:
627 |         return format_subtitles_description(
628 |             language_code=subtitles_media.language,
629 |             language_name=cls.parse_language_name(media_data=subtitles_media),
630 |             special_type=cls.detect_subtitles_type(subtitles_media=subtitles_media),
631 |             )
632 | 
633 | 
634 | class ScraperFactory:
635 |     _scraper_classes_cache: list[type[Scraper]] | None = None
636 |     _scraper_instances_cache: dict[type[Scraper], Scraper] = {}
637 |     _currently_initializing: list[type[Scraper]] = []  # Used to prevent infinite recursion
638 | 
639 |     @classmethod
640 |     def get_initialized_scrapers(cls) -> list[Scraper]:
641 |         """
642 |         Get a list of all previously initialized scrapers.
643 | 
644 |         Returns:
645 |             list[Scraper]: A list of initialized scrapers.
646 |         """
647 |         return list(cls._scraper_instances_cache.values())
648 | 
649 |     @classmethod
650 |     def get_scraper_classes(cls) -> list[type[Scraper]]:
651 |         """
652 |         Find all scraper classes in the scrapers directory.
653 | 
654 |         Returns:
655 |             list[Scraper]: A Scraper subclass.
656 |         """
657 |         if cls._scraper_classes_cache is not None:
658 |             return cls._scraper_classes_cache
659 | 
660 |         cls._scraper_classes_cache = []
661 |         scraper_modules_paths = Path(__file__).parent.glob(f"*{SCRAPER_MODULES_SUFFIX}.py")
662 | 
663 |         for scraper_module_path in scraper_modules_paths:
664 |             sys.path.append(str(scraper_module_path))
665 | 
666 |             module = importlib.import_module(f"{PACKAGE_NAME}.scrapers.{scraper_module_path.stem}")
667 | 
668 |             # Find all 'Scraper' subclasses
669 |             for _, obj in inspect.getmembers(module,
670 |                                              predicate=lambda x: inspect.isclass(x) and issubclass(x, Scraper)):
671 |                 # Skip object if it's an abstract or imported from another module
672 |                 if not inspect.isabstract(obj) and obj.__module__ == module.__name__:
673 |                     cls._scraper_classes_cache.append(obj)
674 | 
675 |         return cls._scraper_classes_cache
676 | 
677 |     @classmethod
678 |     def _get_scraper_instance(cls, scraper_class: type[ScraperT], kwargs: dict | None = None) -> ScraperT:
679 |         """
680 |         Initialize and return a scraper instance.
681 | 
682 |         Args:
683 |             scraper_class (type[ScraperT]): A scraper class to initialize.
684 |             kwargs (dict | None, optional): A dictionary containing parameters to pass to the scraper's constructor.
685 |                 Defaults to None.
686 | 
687 |         Returns:
688 |             Scraper: An instance of the given scraper class.
689 |         """
690 |         logger.debug(f"Initializing '{scraper_class.name}' scraper...")
691 |         kwargs = kwargs or {}
692 | 
693 |         if scraper_class not in cls._scraper_instances_cache:
694 |             logger.debug(f"'{scraper_class.name}' scraper not found in cache, creating a new instance...")
695 | 
696 |             if scraper_class in cls._currently_initializing:
697 |                 raise ScraperError(f"'{scraper_class.name}' scraper is already being initialized.\n"
698 |                                    f"Make sure there are no circular dependencies between scrapers.")
699 | 
700 |             cls._currently_initializing.append(scraper_class)
701 | 
702 |             cls._scraper_instances_cache[scraper_class] = scraper_class(**kwargs)
703 |             cls._currently_initializing.remove(scraper_class)
704 | 
705 |         else:
706 |             logger.debug(f"Cached '{scraper_class.name}' scraper instance found and will be used.")
707 | 
708 |         return cls._scraper_instances_cache[scraper_class]  # type: ignore[return-value]
709 | 
710 |     @classmethod
711 |     @overload
712 |     def get_scraper_instance(cls, scraper_class: type[ScraperT], scraper_id: str | None = ...,
713 |                              url: str | None = ..., kwargs: dict | None = ...,
714 |                              raise_error: Literal[True] = ...) -> ScraperT:
715 |         ...
716 | 
717 |     @classmethod
718 |     @overload
719 |     def get_scraper_instance(cls, scraper_class: type[ScraperT], scraper_id: str | None = ...,
720 |                              url: str | None = ..., kwargs: dict | None = ...,
721 |                              raise_error: Literal[False] = ...) -> ScraperT | None:
722 |         ...
723 | 
724 |     @classmethod
725 |     @overload
726 |     def get_scraper_instance(cls, scraper_class: None = ..., scraper_id: str | None = ...,
727 |                              url: str | None = ..., kwargs: dict | None = ...,
728 |                              raise_error: Literal[True] = ...) -> Scraper:
729 |         ...
730 | 
731 |     @classmethod
732 |     @overload
733 |     def get_scraper_instance(cls, scraper_class: None = ..., scraper_id: str | None = ...,
734 |                              url: str | None = ..., kwargs: dict | None = ...,
735 |                              raise_error: Literal[False] = ...) -> Scraper | None:
736 |         ...
737 | 
738 |     @classmethod
739 |     def get_scraper_instance(cls, scraper_class: type[Scraper] | None = None, scraper_id: str | None = None,
740 |                              url: str | None = None, kwargs: dict | None = None,
741 |                              raise_error: bool = True) -> Scraper | None:
742 |         """
743 |         Find, initialize and return a scraper that matches the given URL or ID.
744 | 
745 |         Args:
746 |             scraper_class (type[ScraperT] | None, optional): A scraper class to initialize. Defaults to None.
747 |             scraper_id (str | None, optional): ID of a scraper to initialize. Defaults to None.
748 |             url (str | None, optional): A URL to match a scraper for to initialize. Defaults to None.
749 |             kwargs (dict | None, optional): A dictionary containing parameters to pass to the scraper's constructor.
750 |                 Defaults to None.
751 |             raise_error (bool, optional): Whether to raise an error if no scraper was found. Defaults to False.
752 | 
753 |         Returns:
754 |             ScraperT | Scraper | None: An instance of a scraper that matches the given URL or ID,
755 |                 None otherwise (if raise_error is False).
756 | 
757 |         Raises:
758 |             ValueError: If no scraper was found and 'raise_error' is True.
759 |         """
760 |         if not any((scraper_class, scraper_id, url)):
761 |             raise ValueError("At least one of: 'scraper_class', 'scraper_id', or 'url' must be provided.")
762 | 
763 |         if scraper_class:
764 |             return cls._get_scraper_instance(
765 |                 scraper_class=scraper_class,
766 |                 kwargs=kwargs,
767 |             )
768 | 
769 |         if scraper_id:
770 |             logger.debug(f"Searching for a scraper object with ID '{scraper_id}'...")
771 |             for scraper in cls.get_scraper_classes():
772 |                 if scraper.id == scraper_id:
773 |                     return cls._get_scraper_instance(
774 |                         scraper_class=scraper,
775 |                         kwargs=kwargs,
776 |                     )
777 | 
778 |         elif url:
779 |             logger.debug(f"Searching for a scraper object that matches URL '{url}'...")
780 |             for scraper in cls.get_scraper_classes():
781 |                 if scraper.match_url(url) is not None:
782 |                     return cls._get_scraper_instance(
783 |                         scraper_class=scraper,
784 |                         kwargs=kwargs,
785 |                     )
786 | 
787 |         error_message = "No matching scraper was found."
788 | 
789 |         if raise_error:
790 |             raise ValueError(error_message)
791 | 
792 |         logger.debug(error_message)
793 |         return None
794 | 
795 | 
796 | class ScraperError(Exception):
797 |     pass
798 | 
799 | 
800 | class DownloadError(ScraperError):
801 |     pass
802 | 
803 | 
804 | class PlaylistLoadError(ScraperError):
805 |     pass
806 | 
807 | 
808 | class SubtitlesDownloadError(ScraperError):
809 |     def __init__(self, language_code: str | None, language_name: str | None = None,
810 |                  special_type: SubtitlesType | None = None, original_exc: Exception | None = None,
811 |                  *args: Any, **kwargs: dict[str, Any]):
812 |         """
813 |         Initialize a SubtitlesDownloadError instance.
814 | 
815 |         Args:
816 |             language_code (str | None, optional): Language code of the subtitles that failed to download.
817 |             language_name (str | None, optional): Language name of the subtitles that failed to download.
818 |             special_type (SubtitlesType | None, optional): Type of the subtitles that failed to download.
819 |             original_exc (Exception | None, optional): The original exception that caused the error.
820 |         """
821 |         super().__init__(*args, **kwargs)
822 |         self.language_code = language_code
823 |         self.language_name = language_name
824 |         self.special_type = special_type
825 |         self.original_exc = original_exc
826 | 


--------------------------------------------------------------------------------
/isubrip/subtitle_formats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelYochpaz/iSubRip/d7d7d1e1e4eaa93d49564b74d0ea0b25e07777b0/isubrip/subtitle_formats/__init__.py


--------------------------------------------------------------------------------
/isubrip/subtitle_formats/subrip.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any
 4 | 
 5 | from isubrip.data_structures import SubtitlesFormatType
 6 | from isubrip.subtitle_formats.subtitles import Subtitles, SubtitlesCaptionBlock
 7 | 
 8 | 
 9 | class SubRipCaptionBlock(SubtitlesCaptionBlock):
10 |     """A subtitles caption block based on the SUBRIP format."""
11 |     def __eq__(self, other: Any) -> bool:
12 |         return isinstance(other, type(self)) and \
13 |                self.start_time == other.start_time and self.end_time == other.end_time and self.payload == other.payload
14 | 
15 |     def __str__(self) -> str:
16 |         result_str = ""
17 |         time_format = "%H:%M:%S,%f"
18 | 
19 |         result_str += f"{self.start_time.strftime(time_format)[:-3]} --> {self.end_time.strftime(time_format)[:-3]}\n"
20 |         result_str += f"{self.payload}"
21 | 
22 |         return result_str
23 | 
24 |     def to_srt(self) -> SubRipCaptionBlock:
25 |         return self
26 | 
27 | 
28 | class SubRipSubtitles(Subtitles[SubRipCaptionBlock]):
29 |     """An object representing a SubRip subtitles file."""
30 |     format = SubtitlesFormatType.SUBRIP
31 | 
32 |     def _dumps(self) -> str:
33 |         subtitles_str = ""
34 | 
35 |         for i, block in enumerate(iterable=self.blocks, start=1):
36 |             subtitles_str += f"{i}\n{str(block)}\n\n"
37 | 
38 |         return subtitles_str.rstrip('\n')
39 | 
40 |     def _loads(self, data: str) -> None:
41 |         raise NotImplementedError("SubRip subtitles loading is not supported.")
42 | 


--------------------------------------------------------------------------------
/isubrip/subtitle_formats/subtitles.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABC, abstractmethod
  4 | from copy import deepcopy
  5 | from datetime import time
  6 | from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar
  7 | 
  8 | from isubrip.logger import logger
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from isubrip.data_structures import SubtitlesFormatType
 12 |     from isubrip.subtitle_formats.subrip import SubRipCaptionBlock, SubRipSubtitles
 13 | 
 14 | RTL_CONTROL_CHARS = ('\u200e', '\u200f', '\u202a', '\u202b', '\u202c', '\u202d', '\u202e')
 15 | RTL_CHAR = '\u202b'
 16 | RTL_LANGUAGES = ["ar", "he", "he-il"]
 17 | 
 18 | SubtitlesT = TypeVar('SubtitlesT', bound='Subtitles')
 19 | SubtitlesBlockT = TypeVar('SubtitlesBlockT', bound='SubtitlesBlock')
 20 | 
 21 | 
 22 | class SubtitlesBlock(ABC):
 23 |     """
 24 |     Abstract base class for subtitles blocks.
 25 | 
 26 |     Attributes:
 27 |         modified (bool): Whether the block has been modified.
 28 |     """
 29 | 
 30 |     def __init__(self) -> None:
 31 |         self.modified: bool = False
 32 | 
 33 |     @abstractmethod
 34 |     def __copy__(self) -> SubtitlesBlock:
 35 |         """Create a copy of the block."""
 36 | 
 37 |     @abstractmethod
 38 |     def __eq__(self, other: Any) -> bool:
 39 |         """Check if two objects are equal."""
 40 | 
 41 |     @abstractmethod
 42 |     def __str__(self) -> str:
 43 |         """Return a string representation of the block."""
 44 | 
 45 | 
 46 | class SubtitlesCaptionBlock(SubtitlesBlock, ABC):
 47 |     """
 48 |     A base class for subtitles caption blocks.
 49 | 
 50 |     Attributes:
 51 |         start_time (time): Start timestamp of the caption block.
 52 |         end_time (time): End timestamp of the caption block.
 53 |         payload (str): Caption block's payload.
 54 |     """
 55 | 
 56 |     def __init__(self, start_time: time, end_time: time, payload: str):
 57 |         """
 58 |         Initialize a new SubtitlesCaptionBlock object.
 59 | 
 60 |         Args:
 61 |             start_time: Start timestamp of the caption block.
 62 |             end_time: End timestamp of the caption block.
 63 |             payload: Caption block's payload.
 64 |         """
 65 |         super().__init__()
 66 |         self.start_time = start_time
 67 |         self.end_time = end_time
 68 |         self.payload = payload
 69 | 
 70 |     def __copy__(self) -> SubtitlesCaptionBlock:
 71 |         copy = self.__class__(self.start_time, self.end_time, self.payload)
 72 |         copy.modified = self.modified
 73 |         return copy
 74 | 
 75 |     def fix_rtl(self) -> None:
 76 |         """Fix payload's text direction to RTL."""
 77 |         previous_payload = self.payload
 78 | 
 79 |         # Remove previous RTL-related formatting
 80 |         for char in RTL_CONTROL_CHARS:
 81 |             self.payload = self.payload.replace(char, '')
 82 | 
 83 |         # Add RLM char at the start of every line
 84 |         self.payload = RTL_CHAR + self.payload.replace("\n", f"\n{RTL_CHAR}")
 85 | 
 86 |         if self.payload != previous_payload:
 87 |             self.modified = True
 88 | 
 89 |     @abstractmethod
 90 |     def to_srt(self) -> SubRipCaptionBlock:
 91 |         """
 92 |         Convert WebVTT caption block to SRT caption block.
 93 | 
 94 |         Returns:
 95 |             SubRipCaptionBlock: The caption block in SRT format.
 96 |         """
 97 |         ...
 98 | 
 99 | 
100 | class Subtitles(Generic[SubtitlesBlockT], ABC):
101 |     """
102 |     An object representing subtitles, made out of blocks.
103 | 
104 |     Attributes:
105 |         _modified (bool): Whether the subtitles have been modified.
106 |         format (SubtitlesFormatType): [Class Attribute] Format of the subtitles (contains name and file extension).
107 |         language_code (str): Language code of the subtitles.
108 |         blocks (list[SubtitlesBlock]): A list of subtitles blocks that make up the subtitles.
109 |         encoding (str): Encoding of the subtitles.
110 |         raw_data (bytes | None): Raw data of the subtitles.
111 |     """
112 |     format: ClassVar[SubtitlesFormatType]
113 | 
114 |     def __init__(self, data: bytes | None, language_code: str, encoding: str = "utf-8"):
115 |         """
116 |         Initialize a new Subtitles object.
117 | 
118 |         Args:
119 |             data (bytes | None): Raw data of the subtitles.
120 |             language_code (str): Language code of the subtitles.
121 |             encoding (str, optional): Encoding of the subtitles. Defaults to "utf-8".
122 |         """
123 |         self._modified = False
124 |         self.raw_data = None
125 | 
126 |         self.blocks: list[SubtitlesBlockT] = []
127 | 
128 |         self.language_code = language_code
129 |         self.encoding = encoding
130 | 
131 |         if data:
132 |             self.raw_data = data
133 |             self._load(data=data)
134 | 
135 |     def __add__(self: SubtitlesT, obj: SubtitlesBlockT | SubtitlesT) -> SubtitlesT:
136 |         """
137 |         Add a new subtitles block, or append blocks from another subtitles object.
138 | 
139 |         Args:
140 |             obj (SubtitlesBlock | Subtitles): A subtitles block or another subtitles object.
141 | 
142 |         Returns:
143 |             Subtitles: The current subtitles object.
144 |         """
145 |         if isinstance(obj, SubtitlesBlock):
146 |             self.add_blocks(obj)
147 | 
148 |         elif isinstance(obj, self.__class__):
149 |             self.append_subtitles(obj)
150 | 
151 |         else:
152 |             logger.warning(f"Cannot add object of type '{type(obj)}' to '{type(self)}' object. Skipping...")
153 | 
154 |         return self
155 | 
156 |     def __copy__(self: SubtitlesT) -> SubtitlesT:
157 |         """Create a copy of the subtitles object."""
158 |         copy = self.__class__(data=None, language_code=self.language_code, encoding=self.encoding)
159 |         copy.raw_data = self.raw_data
160 |         copy.blocks = [block.__copy__() for block in self.blocks]
161 |         copy._modified = self.modified()  # noqa: SLF001
162 |         return copy
163 | 
164 |     def __eq__(self, other: Any) -> bool:
165 |         return isinstance(other, type(self)) and self.blocks == other.blocks
166 | 
167 |     def __str__(self) -> str:
168 |         return self.dumps()
169 | 
170 |     def _dump(self) -> bytes:
171 |         """
172 |         Dump subtitles object to bytes representing the subtitles.
173 | 
174 |         Returns:
175 |             bytes: The subtitles in a bytes object.
176 |         """
177 |         return self._dumps().encode(encoding=self.encoding)
178 | 
179 |     @abstractmethod
180 |     def _dumps(self) -> str:
181 |         """
182 |         Dump subtitles object to a string representing the subtitles.
183 | 
184 |         Returns:
185 |             str: The subtitles in a string format.
186 |         """
187 |         ...
188 | 
189 |     def _load(self, data: bytes) -> None:
190 |         """
191 |         Load and parse subtitles data from bytes.
192 | 
193 |         Args:
194 |             data (bytes): Subtitles data to load.
195 |         """
196 |         parsed_data = data.decode(encoding=self.encoding)
197 |         self._loads(data=parsed_data)
198 | 
199 |     @abstractmethod
200 |     def _loads(self, data: str) -> None:
201 |         """
202 |         Load and parse subtitles data from a string.
203 | 
204 |         Args:
205 |             data (bytes): Subtitles data to load.
206 |         """
207 |         ...
208 | 
209 |     def dump(self) -> bytes:
210 |         """
211 |         Dump subtitles to a bytes object representing the subtitles.
212 |         Returns the original raw subtitles data if they have not been modified, and raw data is available.
213 | 
214 |         Returns:
215 |             bytes: The subtitles in a bytes object.
216 |         """
217 |         if self.raw_data is not None and not self.modified():
218 |             logger.debug("Returning original raw data as subtitles have not been modified.")
219 |             return self.raw_data
220 | 
221 |         return self._dump()
222 | 
223 |     def dumps(self) -> str:
224 |         """
225 |         Dump subtitles to a string representing the subtitles.
226 |         Returns the original raw subtitles data if they have not been modified, and raw data is available.
227 | 
228 |         Returns:
229 | 
230 |         """
231 |         if self.raw_data is not None and not self.modified():
232 |             logger.debug("Returning original raw data (decoded) as subtitles have not been modified.")
233 |             return self.raw_data.decode(encoding=self.encoding)
234 | 
235 |         return self._dumps()
236 | 
237 |     def add_blocks(self: SubtitlesT,
238 |                    blocks: SubtitlesBlockT | list[SubtitlesBlockT],
239 |                    set_modified: bool = True) -> SubtitlesT:
240 |         """
241 |         Add a new subtitles block to current subtitles.
242 | 
243 |         Args:
244 |             blocks (SubtitlesBlock | list[SubtitlesBlock]):
245 |                 A block object or a list of block objects to append.
246 |             set_modified (bool, optional): Whether to set the subtitles as modified. Defaults to True.
247 | 
248 |         Returns:
249 |             Subtitles: The current subtitles object.
250 |         """
251 |         if isinstance(blocks, list):
252 |             if not blocks:
253 |                 return self
254 | 
255 |             self.blocks.extend(blocks)
256 | 
257 |         else:
258 |             self.blocks.append(blocks)
259 | 
260 |         if set_modified:
261 |             self._modified = True
262 | 
263 |         return self
264 | 
265 |     def append_subtitles(self: SubtitlesT,
266 |                          subtitles: SubtitlesT) -> SubtitlesT:
267 |         """
268 |         Append subtitles to an existing subtitles object.
269 | 
270 |         Args:
271 |             subtitles (Subtitles): Subtitles object to append to current subtitles.
272 | 
273 |         Returns:
274 |             Subtitles: The current subtitles object.
275 |         """
276 |         if subtitles.blocks:
277 |             self.add_blocks(deepcopy(subtitles.blocks))
278 | 
279 |             if subtitles.modified():
280 |                 self._modified = True
281 | 
282 |         return self
283 | 
284 |     def polish(self: SubtitlesT,
285 |                fix_rtl: bool = False,
286 |                remove_duplicates: bool = True,
287 |                ) -> SubtitlesT:
288 |         """
289 |         Apply various fixes to subtitles.
290 | 
291 |         Args:
292 |             fix_rtl (bool, optional): Whether to fix text direction of RTL languages. Defaults to False.
293 |             remove_duplicates (bool, optional): Whether to remove duplicate captions. Defaults to True.
294 | 
295 |         Returns:
296 |             Subtitles: The current subtitles object.
297 |         """
298 |         fix_rtl = (fix_rtl and self.language_code in RTL_LANGUAGES)
299 | 
300 |         if not any((
301 |                 fix_rtl,
302 |                 remove_duplicates,
303 |         )):
304 |             return self
305 | 
306 |         previous_block: SubtitlesBlockT | None = None
307 | 
308 |         for block in self.blocks:
309 |             if fix_rtl:
310 |                 block.fix_rtl()
311 | 
312 |             if remove_duplicates and previous_block is not None and block == previous_block:
313 |                 self.blocks.remove(previous_block)
314 |                 self._modified = True
315 | 
316 |             previous_block = block
317 | 
318 |         return self
319 | 
320 |     def modified(self) -> bool:
321 |         """
322 |         Check if the subtitles have been modified (by checking if any of its blocks have been modified).
323 | 
324 |         Returns:
325 |             bool: True if the subtitles have been modified, False otherwise.
326 |         """
327 |         return self._modified or any(block.modified for block in self.blocks)
328 | 
329 |     def to_srt(self) -> SubRipSubtitles:
330 |         """
331 |         Convert subtitles to SRT format.
332 | 
333 |         Returns:
334 |             SubRipSubtitles: The subtitles in SRT format.
335 |         """
336 |         from isubrip.subtitle_formats.subrip import SubRipSubtitles
337 | 
338 |         subrip_subtitles = SubRipSubtitles(
339 |             data=None,
340 |             language_code=self.language_code,
341 |             encoding=self.encoding,
342 |         )
343 |         subrip_blocks = [block.to_srt() for block in self.blocks if isinstance(block, SubtitlesCaptionBlock)]
344 |         subrip_subtitles.add_blocks(subrip_blocks)
345 | 
346 |         return subrip_subtitles
347 | 
348 | 
349 | def split_timestamp(timestamp: str) -> tuple[time, time]:
350 |     """
351 |     Split a subtitles timestamp into start and end.
352 | 
353 |     Args:
354 |         timestamp (str): A subtitles timestamp. For example: "00:00:00.000 --> 00:00:00.000"
355 | 
356 |     Returns:
357 |         tuple(time, time): A tuple containing start and end times as a datetime object.
358 |     """
359 |     # Support ',' character in timestamp's milliseconds (used in SubRip format).
360 |     timestamp = timestamp.replace(',', '.')
361 | 
362 |     start_time, end_time = timestamp.split(" --> ")
363 |     return time.fromisoformat(start_time), time.fromisoformat(end_time)
364 | 


--------------------------------------------------------------------------------
/isubrip/subtitle_formats/webvtt.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABCMeta
  4 | from copy import deepcopy
  5 | import re
  6 | from typing import TYPE_CHECKING, Any, ClassVar
  7 | 
  8 | from isubrip.data_structures import SubtitlesFormatType
  9 | from isubrip.subtitle_formats.subrip import SubRipCaptionBlock
 10 | from isubrip.subtitle_formats.subtitles import RTL_CHAR, Subtitles, SubtitlesBlock, SubtitlesCaptionBlock
 11 | from isubrip.utils import split_subtitles_timestamp
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from datetime import time
 15 | 
 16 | # WebVTT Documentation:
 17 | # https://www.w3.org/TR/webvtt1/#cues
 18 | # https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#webvtt_cues
 19 | 
 20 | 
 21 | class WebVTTBlock(SubtitlesBlock, metaclass=ABCMeta):
 22 |     """
 23 |     Abstract base class for WEBVTT cue blocks.
 24 |     """
 25 |     is_caption_block: bool = False
 26 | 
 27 | 
 28 | class WebVTTCaptionBlock(SubtitlesCaptionBlock, WebVTTBlock):
 29 |     """An object representing a WebVTT caption block."""
 30 |     subrip_alignment_conversion: ClassVar[bool] = False
 31 | 
 32 |     is_caption_block: bool = True
 33 | 
 34 |     def __init__(self, start_time: time, end_time: time, payload: str, settings: str = "", identifier: str = ""):
 35 |         """
 36 |         Initialize a new object representing a WebVTT caption block.
 37 | 
 38 |         Args:
 39 |             start_time (time): Cue start time.
 40 |             end_time (time): Cue end time.
 41 |             settings (str): Cue settings.
 42 |             payload (str): Cue payload.
 43 |         """
 44 |         super().__init__(start_time=start_time, end_time=end_time, payload=payload)
 45 |         self.identifier = identifier
 46 |         self.settings = settings
 47 | 
 48 |     def __copy__(self) -> WebVTTCaptionBlock:
 49 |         copy = self.__class__(start_time=self.start_time, end_time=self.end_time, payload=self.payload,
 50 |                               settings=self.settings, identifier=self.identifier)
 51 |         copy.modified = self.modified
 52 |         return copy
 53 | 
 54 |     def to_srt(self) -> SubRipCaptionBlock:
 55 |         # Add a {\an8} tag at the start of the payload if it has 'line:0.00%' in the settings
 56 |         if "line:0.00%" in self.settings and self.subrip_alignment_conversion:
 57 |             # If the payload starts with an RTL control char, add the tag after it
 58 |             if self.payload.startswith(RTL_CHAR):
 59 |                 payload = RTL_CHAR + WEBVTT_ALIGN_TOP_TAG + self.payload[len(RTL_CHAR):]
 60 | 
 61 |             else:
 62 |                 payload = WEBVTT_ALIGN_TOP_TAG + self.payload
 63 | 
 64 |         else:
 65 |             payload = self.payload
 66 | 
 67 |         return SubRipCaptionBlock(start_time=self.start_time, end_time=self.end_time, payload=payload)
 68 | 
 69 |     def __eq__(self, other: Any) -> bool:
 70 |         return isinstance(other, type(self)) and \
 71 |             self.start_time == other.start_time and self.end_time == other.end_time and self.payload == other.payload
 72 | 
 73 |     def __str__(self) -> str:
 74 |         result_str = ""
 75 |         time_format = "%H:%M:%S.%f"
 76 | 
 77 |         # Add identifier (if it exists)
 78 |         if self.identifier:
 79 |             result_str += f"{self.identifier}\n"
 80 | 
 81 |         result_str += f"{self.start_time.strftime(time_format)[:-3]} --> {self.end_time.strftime(time_format)[:-3]}"
 82 | 
 83 |         if self.settings:
 84 |             result_str += f" {self.settings}"
 85 | 
 86 |         result_str += f"\n{self.payload}"
 87 | 
 88 |         return result_str
 89 | 
 90 | 
 91 | class WebVTTCommentBlock(WebVTTBlock):
 92 |     """An object representing a WebVTT comment block."""
 93 |     header = "NOTE"
 94 | 
 95 |     def __init__(self, payload: str, inline: bool = False) -> None:
 96 |         """
 97 |         Initialize a new object representing a WebVTT comment block.
 98 | 
 99 |         Args:
100 |             payload (str): Comment payload.
101 |         """
102 |         super().__init__()
103 |         self.payload = payload
104 |         self.inline = inline
105 | 
106 |     def __copy__(self) -> WebVTTCommentBlock:
107 |         copy = self.__class__(payload=self.payload, inline=self.inline)
108 |         copy.modified = self.modified
109 |         return copy
110 | 
111 |     def __eq__(self, other: Any) -> bool:
112 |         return isinstance(other, type(self)) and self.inline == other.inline and self.payload == other.payload
113 | 
114 |     def __str__(self) -> str:
115 |         if self.inline:
116 |             return f"{self.header} {self.payload}"
117 | 
118 |         if self.payload:
119 |             return f"{self.header}\n{self.payload}"
120 | 
121 |         return self.header
122 | 
123 | 
124 | class WebVTTStyleBlock(WebVTTBlock):
125 |     """An object representing a WebVTT style block."""
126 |     header = "STYLE"
127 | 
128 |     def __init__(self, payload: str) -> None:
129 |         """
130 |         Initialize a new object representing a WebVTT style block.
131 | 
132 |         Args:
133 |             payload (str): Style payload.
134 |         """
135 |         super().__init__()
136 |         self.payload = payload
137 | 
138 |     def __copy__(self) -> WebVTTStyleBlock:
139 |         copy = self.__class__(payload=self.payload)
140 |         copy.modified = self.modified
141 |         return copy
142 | 
143 |     def __eq__(self, other: Any) -> bool:
144 |         return isinstance(other, type(self)) and self.payload == other.payload
145 | 
146 |     def __str__(self) -> str:
147 |         return f"{self.header}\n{self.payload}"
148 | 
149 | 
150 | class WebVTTRegionBlock(WebVTTBlock):
151 |     """An object representing a WebVTT region block."""
152 |     header = "REGION"
153 | 
154 |     def __init__(self, payload: str) -> None:
155 |         """
156 |         Initialize a new object representing a WebVTT region block.
157 | 
158 |         Args:
159 |             payload (str): Region payload.
160 |         """
161 |         super().__init__()
162 |         self.payload = payload
163 | 
164 |     def __copy__(self) -> WebVTTRegionBlock:
165 |         copy = self.__class__(payload=self.payload)
166 |         copy.modified = self.modified
167 |         return copy
168 | 
169 |     def __eq__(self, other: Any) -> bool:
170 |         return isinstance(other, type(self)) and self.payload == other.payload
171 | 
172 |     def __str__(self) -> str:
173 |         return f"{self.header} {self.payload}"
174 | 
175 | 
176 | class WebVTTSubtitles(Subtitles[WebVTTBlock]):
177 |     """An object representing a WebVTT subtitles file."""
178 |     format = SubtitlesFormatType.WEBVTT
179 | 
180 |     def _dumps(self) -> str:
181 |         """
182 |         Dump subtitles to a string representing the subtitles in a WebVTT format.
183 | 
184 |         Returns:
185 |             str: The subtitles in a string using a WebVTT format.
186 |         """
187 |         subtitles_str = "WEBVTT\n\n"
188 | 
189 |         for block in self.blocks:
190 |             subtitles_str += str(block) + "\n\n"
191 | 
192 |         return subtitles_str.rstrip('\n')
193 | 
194 |     def _loads(self, data: str) -> None:
195 |         """
196 |         Load and parse WebVTT subtitles data from a string.
197 | 
198 |         Args:
199 |             data (bytes): Subtitles data to load.
200 |         """
201 |         prev_line: str = ""
202 |         lines_iterator = iter(data.splitlines())
203 | 
204 |         for line in lines_iterator:
205 |             # If the line is a timestamp
206 |             if caption_block_regex := re.match(WEBVTT_CAPTION_BLOCK_REGEX, line):
207 |                 # If previous line wasn't empty, add it as an identifier
208 |                 if prev_line:
209 |                     caption_identifier = prev_line
210 | 
211 |                 else:
212 |                     caption_identifier = ""
213 | 
214 |                 caption_timestamps = split_subtitles_timestamp(caption_block_regex.group(1))
215 |                 caption_settings = caption_block_regex.group(2)
216 |                 caption_payload = ""
217 | 
218 |                 for additional_line in lines_iterator:
219 |                     if not additional_line:
220 |                         line = additional_line
221 |                         break
222 | 
223 |                     caption_payload += additional_line + "\n"
224 | 
225 |                 caption_payload = caption_payload.rstrip("\n")
226 |                 self.blocks.append(WebVTTCaptionBlock(
227 |                     identifier=caption_identifier,
228 |                     start_time=caption_timestamps[0],
229 |                     end_time=caption_timestamps[1],
230 |                     settings=caption_settings,
231 |                     payload=caption_payload))
232 | 
233 |             elif comment_block_regex := re.match(WEBVTT_COMMENT_HEADER_REGEX, line):
234 |                 comment_payload = ""
235 |                 inline = False
236 | 
237 |                 if comment_block_regex.group(1) is not None:
238 |                     comment_payload += comment_block_regex.group(1) + "\n"
239 |                     inline = True
240 | 
241 |                 for additional_line in lines_iterator:
242 |                     if not additional_line:
243 |                         line = additional_line
244 |                         break
245 | 
246 |                     comment_payload += additional_line + "\n"
247 | 
248 |                 self.blocks.append(WebVTTCommentBlock(comment_payload.rstrip("\n"), inline=inline))
249 | 
250 |             elif line.rstrip(' \t') == WebVTTRegionBlock.header:
251 |                 region_payload = ""
252 | 
253 |                 for additional_line in lines_iterator:
254 |                     if not additional_line:
255 |                         line = additional_line
256 |                         break
257 | 
258 |                     region_payload += additional_line + "\n"
259 | 
260 |                 self.blocks.append(WebVTTRegionBlock(region_payload.rstrip("\n")))
261 | 
262 |             elif line.rstrip(' \t') == WebVTTStyleBlock.header:
263 |                 style_payload = ""
264 | 
265 |                 for additional_line in lines_iterator:
266 |                     if not additional_line:
267 |                         line = additional_line
268 |                         break
269 | 
270 |                     style_payload += additional_line + "\n"
271 | 
272 |                 self.blocks.append(WebVTTStyleBlock(style_payload.rstrip("\n")))
273 | 
274 |             prev_line = line
275 | 
276 |     def append_subtitles(self: WebVTTSubtitles,
277 |                          subtitles: WebVTTSubtitles) -> WebVTTSubtitles:
278 |         if subtitles.blocks:
279 |             subtitles_copy = deepcopy(subtitles)
280 | 
281 |             # Remove head blocks from the subtitles that will be appended
282 |             subtitles_copy.remove_head_blocks()
283 | 
284 |             self.add_blocks(subtitles_copy.blocks)
285 | 
286 |             if subtitles_copy.modified():
287 |                 self._modified = True
288 | 
289 |         return self
290 | 
291 |     def remove_head_blocks(self) -> None:
292 |         """
293 |         Remove all head blocks (Style / Region) from the subtitles.
294 | 
295 |         NOTE:
296 |             Comment blocks are removed as well if they are before the first caption block (since they're probably
297 |             related to the head blocks).
298 |         """
299 |         for block in self.blocks:
300 |             if isinstance(block, WebVTTCaptionBlock):
301 |                 break
302 | 
303 |             if isinstance(block, (WebVTTCommentBlock, WebVTTStyleBlock, WebVTTRegionBlock)):
304 |                 self.blocks.remove(block)
305 | 
306 | 
307 | # --- Constants ---
308 | WEBVTT_PERCENTAGE_REGEX = r"\d{1,3}(?:\.\d+)?%"
309 | WEBVTT_CAPTION_TIMINGS_REGEX = \
310 |     r"(?:[0-5]\d:)?[0-5]\d:[0-5]\d[\.,]\d{3}[ \t]+-->[ \t]+(?:[0-5]\d:)?[0-5]\d:[0-5]\d[\.,]\d{3}"
311 | 
312 | WEBVTT_CAPTION_SETTING_ALIGNMENT_REGEX = r"align:(?:start|center|middle|end|left|right)"
313 | WEBVTT_CAPTION_SETTING_LINE_REGEX = rf"line:(?:{WEBVTT_PERCENTAGE_REGEX}|-?\d+%)(?:,(?:start|center|middle|end))?"
314 | WEBVTT_CAPTION_SETTING_POSITION_REGEX = rf"position:{WEBVTT_PERCENTAGE_REGEX}(?:,(?:start|center|middle|end))?"
315 | WEBVTT_CAPTION_SETTING_REGION_REGEX = r"region:(?:(?!(?:-->)|\t)\S)+"
316 | WEBVTT_CAPTION_SETTING_SIZE_REGEX = rf"size:{WEBVTT_PERCENTAGE_REGEX}"
317 | WEBVTT_CAPTION_SETTING_VERTICAL_REGEX = r"vertical:(?:lr|rl)"
318 | 
319 | WEBVTT_CAPTION_SETTINGS_REGEX = ("(?:"
320 |                                  f"(?:{WEBVTT_CAPTION_SETTING_ALIGNMENT_REGEX})|"
321 |                                  f"(?:{WEBVTT_CAPTION_SETTING_LINE_REGEX})|"
322 |                                  f"(?:{WEBVTT_CAPTION_SETTING_POSITION_REGEX})|"
323 |                                  f"(?:{WEBVTT_CAPTION_SETTING_REGION_REGEX})|"
324 |                                  f"(?:{WEBVTT_CAPTION_SETTING_SIZE_REGEX})|"
325 |                                  f"(?:{WEBVTT_CAPTION_SETTING_VERTICAL_REGEX})|"
326 |                                  f"(?:[ \t]+)"
327 |                                  ")*")
328 | 
329 | WEBVTT_CAPTION_BLOCK_REGEX = re.compile(rf"^({WEBVTT_CAPTION_TIMINGS_REGEX})[ \t]*({WEBVTT_CAPTION_SETTINGS_REGEX})?")
330 | WEBVTT_COMMENT_HEADER_REGEX = re.compile(rf"^{WebVTTCommentBlock.header}(?:$|[ \t])(.+)?")
331 | 
332 | WEBVTT_ALIGN_TOP_TAG = "{\\an8}"
333 | 


--------------------------------------------------------------------------------
/isubrip/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABCMeta
  4 | import datetime as dt
  5 | from functools import lru_cache
  6 | import logging
  7 | from pathlib import Path
  8 | import re
  9 | import secrets
 10 | import shutil
 11 | import sys
 12 | from typing import TYPE_CHECKING, Any, Literal, cast, overload
 13 | 
 14 | from wcwidth import wcswidth
 15 | 
 16 | from isubrip.constants import TEMP_FOLDER_PATH, TITLE_REPLACEMENT_STRINGS, WINDOWS_RESERVED_FILE_NAMES
 17 | from isubrip.data_structures import (
 18 |     Episode,
 19 |     MediaBase,
 20 |     Movie,
 21 |     Season,
 22 |     Series,
 23 |     SubtitlesData,
 24 |     SubtitlesFormatType,
 25 |     SubtitlesType,
 26 |     T,
 27 | )
 28 | from isubrip.logger import logger
 29 | 
 30 | if TYPE_CHECKING:
 31 |     from os import PathLike
 32 |     from types import TracebackType
 33 | 
 34 |     import httpx
 35 |     from pydantic import BaseModel, ValidationError
 36 | 
 37 | 
 38 | class SingletonMeta(ABCMeta):
 39 |     """
 40 |     A metaclass that implements the Singleton pattern.
 41 |     When a class using this metaclass is initialized, it will return the same instance every time.
 42 |     """
 43 |     _instances: dict[object, object] = {}
 44 | 
 45 |     def __call__(cls, *args: Any, **kwargs: Any) -> object:
 46 |         if cls._instances.get(cls) is None:
 47 |             cls._instances[cls] = super().__call__(*args, **kwargs)
 48 | 
 49 |         return cls._instances[cls]
 50 | 
 51 | 
 52 | class TempDirGenerator:
 53 |     """A class for generating temporary directories, and disposing them once the object is destroyed."""
 54 |     _generated_temp_directories: list[Path] = []
 55 | 
 56 |     def __exit__(self, exc_type: type[BaseException] | None,
 57 |                  exc_val: BaseException | None, exc_tb: TracebackType | None) -> None:
 58 |         self.cleanup()
 59 | 
 60 |     @classmethod
 61 |     def generate(cls, directory_name: str | None = None) -> Path:
 62 |         """
 63 |         Generate a temporary directory within 'TEMP_FOLDER_PATH'.
 64 | 
 65 |         Args:
 66 |             directory_name (str | None, optional): Name of the directory to generate.
 67 |                 If not specified, a random string will be generated. Defaults to None.
 68 | 
 69 |         Returns:
 70 |             Path: Path to the generated directory.
 71 |         """
 72 |         directory_name = directory_name or secrets.token_hex(5)
 73 |         full_path = TEMP_FOLDER_PATH / directory_name
 74 | 
 75 |         if full_path.is_dir():
 76 |             if full_path in cls._generated_temp_directories:  # Generated by this class
 77 |                 logger.debug(f"Using previously generated temporary directory: '{full_path}'.")
 78 |                 return full_path
 79 | 
 80 |             logger.debug(f"Temporary directory '{full_path}' already exists.\n"
 81 |                          f"Emptying directory from all contents...")
 82 |             shutil.rmtree(full_path)
 83 |             full_path.mkdir(parents=True)
 84 | 
 85 |         else:
 86 |             full_path.mkdir(parents=True)
 87 |             logger.debug(f"Temporary directory has been generated: '{full_path}'")
 88 | 
 89 |         cls._generated_temp_directories.append(full_path)
 90 |         return full_path
 91 | 
 92 |     @classmethod
 93 |     def cleanup(cls) -> None:
 94 |         """Remove all temporary directories generated by this object."""
 95 |         for temp_directory in cls._generated_temp_directories:
 96 |             logger.debug(f"Removing temporary directory: '{temp_directory}'")
 97 | 
 98 |             try:
 99 |                 shutil.rmtree(temp_directory)
100 | 
101 |             except Exception as e:
102 |                 logger.debug(f"Failed to remove temporary directory '{temp_directory}': {e}")
103 | 
104 |         cls._generated_temp_directories = []
105 | 
106 | 
107 | def convert_epoch_to_datetime(epoch_timestamp: int) -> dt.datetime:
108 |     """
109 |     Convert an epoch timestamp to a datetime object.
110 | 
111 |     Args:
112 |         epoch_timestamp (int): Epoch timestamp.
113 | 
114 |     Returns:
115 |         datetime: A datetime object representing the timestamp.
116 |     """
117 |     if epoch_timestamp >= 0:
118 |         return dt.datetime.fromtimestamp(epoch_timestamp)
119 | 
120 |     return dt.datetime(1970, 1, 1) + dt.timedelta(seconds=epoch_timestamp)
121 | 
122 | 
123 | def convert_log_level(log_level: str) -> int:
124 |     """
125 |     Convert a log level string to a logging level.
126 | 
127 |     Args:
128 |         log_level (str): Log level string.
129 | 
130 |     Returns:
131 |         int: Logging level.
132 |     
133 |     Raises:
134 |         ValueError: If the log level is invalid.
135 |     """
136 |     log_level_upper = log_level.upper()
137 |     if log_level_upper not in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'):
138 |         raise ValueError(f"Invalid log level: {log_level}")
139 | 
140 |     return cast(int, getattr(logging, log_level_upper))
141 | 
142 | 
143 | def download_subtitles_to_file(media_data: Movie | Episode, subtitles_data: SubtitlesData, output_path: str | PathLike,
144 |                                source_abbreviation: str | None = None, overwrite: bool = False) -> Path:
145 |     """
146 |     Download subtitles to a file.
147 | 
148 |     Args:
149 |         media_data (Movie | Episode): An object containing media data.
150 |         subtitles_data (SubtitlesData): A SubtitlesData object containing subtitles data.
151 |         output_path (str | PathLike): Path to the output folder.
152 |         source_abbreviation (str | None, optional): Abbreviation of the source the subtitles are downloaded from.
153 |             Defaults to None.
154 |         overwrite (bool, optional): Whether to overwrite files if they already exist. Defaults to True.
155 | 
156 |     Returns:
157 |         Path: Path to the downloaded subtitles file.
158 | 
159 |     Raises:
160 |         ValueError: If the path in `output_path` does not exist.
161 |     """
162 |     output_path = Path(output_path)
163 | 
164 |     if not output_path.is_dir():
165 |         raise ValueError(f"Invalid path: {output_path}")
166 | 
167 |     if isinstance(media_data, Movie):
168 |         file_name = format_release_name(title=media_data.name,
169 |                                         release_date=media_data.release_date,
170 |                                         media_source=source_abbreviation,
171 |                                         language_code=subtitles_data.language_code,
172 |                                         subtitles_type=subtitles_data.special_type,
173 |                                         file_format=subtitles_data.subtitles_format)
174 |     else:  # isinstance(media_data, Episode):
175 |         file_name = format_release_name(title=media_data.series_name,
176 |                                         release_date=media_data.release_date,
177 |                                         season_number=media_data.season_number,
178 |                                         episode_number=media_data.episode_number,
179 |                                         episode_name=media_data.episode_name,
180 |                                         media_source=source_abbreviation,
181 |                                         language_code=subtitles_data.language_code,
182 |                                         subtitles_type=subtitles_data.special_type,
183 |                                         file_format=subtitles_data.subtitles_format)
184 | 
185 |     file_path = output_path / file_name
186 | 
187 |     if file_path.exists() and not overwrite:
188 |         file_path = generate_non_conflicting_path(file_path=file_path)
189 | 
190 |     with file_path.open('wb') as f:
191 |         f.write(subtitles_data.content)
192 | 
193 |     return file_path
194 | 
195 | def format_config_validation_error(exc: ValidationError) -> str:
196 |     """
197 |     Format a Pydantic ValidationError into a human-readable string.
198 | 
199 |     Args:
200 |         exc (ValidationError): The ValidationError instance containing validation errors.
201 | 
202 |     Returns:
203 |         str: A formatted string describing the validation errors, including the location,
204 |              type, value, and error messages for each invalid field.
205 |     """
206 |     validation_errors = exc.errors()
207 |     error_str = ""
208 | 
209 |     consolidated_errors: dict[str, dict[str, Any]] = {}
210 | 
211 |     for validation_error in validation_errors:
212 |         value: Any = validation_error['input']
213 |         value_type: str = type(value).__name__
214 |         location: list[str] = [str(item) for item in validation_error['loc']]
215 |         error_msg: str = validation_error['msg']
216 | 
217 |         # When the expected type is a union, Pydantic returns several errors for each type,
218 |         # with the type being the last item in the location list
219 |         if (
220 |                 isinstance(location[-1], str) and
221 |                 (location[-1].endswith(']') or location[-1] in ('str', 'int', 'float', 'bool'))
222 |         ):
223 |             location.pop()
224 | 
225 |         if len(location) > 1:
226 |             location_str = ".".join(location)
227 | 
228 |         else:
229 |             location_str = location[0]
230 | 
231 |         if location_str in consolidated_errors:
232 |             consolidated_errors[location_str]['errors'].append(error_msg)
233 | 
234 |         else:
235 |             consolidated_errors[location_str] = {}
236 |             consolidated_errors[location_str]['info'] = {
237 |                 "value": value,
238 |                 "type": value_type,
239 |             }
240 |             consolidated_errors[location_str]['errors'] = [error_msg]
241 | 
242 |     for error_loc, error_data in consolidated_errors.items():
243 |         error_type = error_data['info']['type']
244 |         error_value = error_data['info']['value']
245 |         error_str += f"'{error_loc}' (type: '{error_type}', value: '{error_value}'):\n"
246 |         
247 |         for error in error_data['errors']:
248 |             error_str += f"    {error}\n"
249 | 
250 |     return error_str
251 | 
252 | 
253 | def format_list(items: list[str], width: int = 80) -> str:
254 |     """
255 |     Format a list of strings into a grid-like display with dynamic column widths.
256 |     
257 |     The function automatically calculates the optimal number of columns based on the maximum item width 
258 |     and the desired total width. It properly handles Unicode characters by using their display width.
259 | 
260 |     Args:
261 |         items (list[str]): List of strings to format
262 |         width (int, optional): Maximum width of the output in characters. Defaults to 80.
263 | 
264 |     Returns:
265 |         str: A formatted string with items arranged in columns
266 | 
267 |     Example:
268 |         >>> items = ["Item 1", "Long Item 2", "Item 3", "Item 4"]
269 |         >>> print(format_list(items, width=40))
270 |         Item 1      Long Item 2
271 |         Item 3      Item 4
272 |     """
273 |     if not items:
274 |         return ""
275 |     
276 |     # Calculate true display width for each item and add spacing
277 |     item_widths = [(s, wcswidth(s)) for s in items]
278 |     column_width = max(width for _, width in item_widths) + 4  # Add spacing between columns
279 |     columns = max(1, width // column_width)  # At least one column
280 |     
281 |     # Build rows with proper spacing
282 |     rows = []
283 |     for i in range(0, len(item_widths), columns):
284 |         row_items = item_widths[i:i + columns]
285 |         cols = []
286 |         for text, text_width in row_items:
287 |             padding = " " * (column_width - text_width)
288 |             cols.append(f"{text}{padding}")
289 |         rows.append("".join(cols).rstrip())
290 |     
291 |     return "\n".join(rows)
292 | 
293 | 
294 | def format_media_description(media_data: MediaBase, shortened: bool = False) -> str:
295 |     """
296 |     Generate a short description string of a media object.
297 | 
298 |     Args:
299 |         media_data (MediaBase): An object containing media data.
300 |         shortened (bool, optional): Whether to generate a shortened description. Defaults to False.
301 | 
302 |     Returns:
303 |         str: A short description string of the media object.
304 |     """
305 |     if isinstance(media_data, Movie):
306 |         release_year = (
307 |             media_data.release_date.year
308 |             if isinstance(media_data.release_date, dt.datetime)
309 |             else media_data.release_date
310 |         )
311 |         description_str = f"{media_data.name} [{release_year}]"
312 | 
313 |         if media_data.id:
314 |             description_str += f" (ID: {media_data.id})"
315 | 
316 |         return description_str
317 | 
318 |     if isinstance(media_data, Series):
319 |         description_str = f"{media_data.series_name}"
320 | 
321 |         if media_data.series_release_date:
322 |             if isinstance(media_data.series_release_date, dt.datetime):
323 |                 description_str += f" [{media_data.series_release_date.year}]"
324 | 
325 |             else:
326 |                 description_str += f" [{media_data.series_release_date}]"
327 | 
328 |         if media_data.id:
329 |             description_str += f" (ID: {media_data.id})"
330 | 
331 |         return description_str
332 | 
333 |     if isinstance(media_data, Season):
334 |         if shortened:
335 |             description_str = f"Season {media_data.season_number:02d}"
336 | 
337 |         else:
338 |             description_str = f"{media_data.series_name} - Season {media_data.season_number:02d}"
339 | 
340 |         if media_data.season_name:
341 |             description_str += f" - {media_data.season_name}"
342 | 
343 |         if media_data.id:
344 |             description_str += f" (ID: {media_data.id})"
345 | 
346 |         return description_str
347 | 
348 |     if isinstance(media_data, Episode):
349 |         if shortened:
350 |             description_str = f"S{media_data.season_number:02d}E{media_data.episode_number:02d}"
351 | 
352 |         else:
353 |             description_str = (f"{media_data.series_name} - "
354 |                                f"S{media_data.season_number:02d}E{media_data.episode_number:02d}")
355 | 
356 |         if media_data.episode_name:
357 |             description_str += f" - {media_data.episode_name}"
358 | 
359 |         if media_data.id:
360 |             description_str += f" (ID: {media_data.id})"
361 | 
362 |         return description_str
363 | 
364 |     raise ValueError(f"Unsupported media type: '{type(media_data)}'")
365 | 
366 | 
367 | def format_release_name(title: str,
368 |                         release_date: dt.datetime | int | None = None,
369 |                         season_number: int | None = None,
370 |                         episode_number: int | None = None,
371 |                         episode_name: str | None = None,
372 |                         media_source: str | None = None,
373 |                         source_type: str | None = "WEB",
374 |                         additional_info: str | list[str] | None = None,
375 |                         language_code: str | None = None,
376 |                         subtitles_type: SubtitlesType | None = None,
377 |                         file_format: str | SubtitlesFormatType | None = None) -> str:
378 |     """
379 |     Format a release name.
380 | 
381 |     Args:
382 |         title (str): Media title.
383 |         release_date (int | None, optional): Release date (datetime), or year (int) of the media. Defaults to None.
384 |         season_number (int | None, optional): Season number. Defaults to None.
385 |         episode_number (int | None, optional): Episode number. Defaults to None.
386 |         episode_name (str | None, optional): Episode name. Defaults to None.
387 |         media_source (str | None, optional): Media source name (full or abbreviation). Defaults to None.
388 |         source_type(str | None, optional): General source type (WEB, BluRay, etc.). Defaults to None.
389 |         additional_info (list[str] | str | None, optional): Additional info to add to the file name. Defaults to None.
390 |         language_code (str | None, optional): Language code. Defaults to None.
391 |         subtitles_type (SubtitlesType | None, optional): Subtitles type. Defaults to None.
392 |         file_format (SubtitlesFormat | str | None, optional): File format to use.  Defaults to None.
393 | 
394 |     Returns:
395 |         str: Generated file name.
396 |     """
397 |     file_name = standardize_title(title).rstrip('.')
398 | 
399 |     if release_date is not None:
400 |         if isinstance(release_date, dt.datetime):
401 |             release_year = release_date.year
402 | 
403 |         else:
404 |             release_year = release_date
405 | 
406 |         file_name += f".{release_year}"
407 | 
408 |     if season_number is not None and episode_number is not None:
409 |         file_name += f".S{season_number:02}E{episode_number:02}"
410 | 
411 |     if episode_name is not None:
412 |         file_name += f".{standardize_title(episode_name).rstrip('.')}"
413 | 
414 |     if media_source is not None:
415 |         file_name += f".{media_source}"
416 | 
417 |     if source_type is not None:
418 |         file_name += f".{source_type}"
419 | 
420 |     if additional_info is not None:
421 |         if isinstance(additional_info, (list, tuple)):
422 |             additional_info = '.'.join(additional_info)
423 | 
424 |         file_name += f".{additional_info}"
425 | 
426 |     if language_code is not None:
427 |         file_name += f".{language_code}"
428 | 
429 |     if subtitles_type is not None:
430 |         file_name += f".{subtitles_type.value.lower()}"
431 | 
432 |     if file_format is not None:
433 |         if isinstance(file_format, SubtitlesFormatType):
434 |             file_format = file_format.value.file_extension
435 | 
436 |         file_name += f".{file_format}"
437 | 
438 |     return file_name
439 | 
440 | 
441 | @lru_cache
442 | def format_subtitles_description(language_code: str | None = None, language_name: str | None = None,
443 |                                  special_type: SubtitlesType | None = None) -> str:
444 |     """
445 |     Format a subtitles description using its attributes.
446 | 
447 |     Args:
448 |         language_code (str | None, optional): Language code. Defaults to None.
449 |         language_name (str | None, optional): Language name. Defaults to None.
450 |         special_type (SubtitlesType | None, optional): Subtitles type. Defaults to None.
451 | 
452 |     Returns:
453 |         str: Formatted subtitles description.
454 |     
455 |     Raises:
456 |         ValueError: If neither `language_code` nor `language_name` is provided.
457 |     """
458 |     if language_name and language_code:
459 |         language_str = f"{language_name} ({language_code})"
460 | 
461 |     elif result := (language_name or language_code):
462 |         language_str = result
463 | 
464 |     else:
465 |         raise ValueError("Either 'language_code' or 'language_name' must be provided.")
466 |     
467 |     if special_type:
468 |         language_str += f" [{special_type.value}]"
469 | 
470 |     return language_str
471 | 
472 | 
473 | def get_model_field(model: BaseModel | None, field: str, convert_to_dict: bool = False) -> Any:
474 |     """
475 |     Get a field from a Pydantic model.
476 | 
477 |     Args:
478 |         model (BaseModel | None): A Pydantic model.
479 |         field (str): Field name to retrieve.
480 |         convert_to_dict (bool, optional): Whether to convert the field value to a dictionary. Defaults to False.
481 | 
482 |     Returns:
483 |         Any: The field value.
484 |     """
485 |     if model and hasattr(model, field):
486 |         field_value = getattr(model, field)
487 | 
488 |         if convert_to_dict and hasattr(field_value, 'dict'):
489 |             field_value = field_value.dict()
490 | 
491 |         return field_value
492 | 
493 |     return None
494 | 
495 | 
496 | def generate_media_folder_name(media_data: Movie | Episode, source: str | None = None) -> str:
497 |     """
498 |     Generate a folder name for media data.
499 | 
500 |     Args:
501 |         media_data (Movie | Episode): A movie or episode data object.
502 |         source (str | None, optional): Abbreviation of the source to use for file names. Defaults to None.
503 | 
504 |     Returns:
505 |         str: A folder name for the media data.
506 |     """
507 |     if isinstance(media_data, Movie):
508 |         return format_release_name(
509 |             title=media_data.name,
510 |             release_date=media_data.release_date,
511 |             media_source=source,
512 |         )
513 | 
514 |     # elif isinstance(media_data, Episode):
515 |     return format_release_name(
516 |         title=media_data.series_name,
517 |         season_number=media_data.season_number,
518 |         episode_number=media_data.episode_number,
519 |         media_source=source,
520 |     )
521 | 
522 | 
523 | def generate_non_conflicting_path(file_path: Path, has_extension: bool = True) -> Path:
524 |     """
525 |     Generate a non-conflicting path for a file.
526 |     If the file already exists, a number will be added to the end of the file name.
527 | 
528 |     Args:
529 |         file_path (Path): Path to a file.
530 |         has_extension (bool, optional): Whether the name of the file includes file extension. Defaults to True.
531 | 
532 |     Returns:
533 |         Path: A non-conflicting file path.
534 |     """
535 |     if isinstance(file_path, str):
536 |         file_path = Path(file_path)
537 | 
538 |     if not file_path.exists():
539 |         return file_path
540 | 
541 |     i = 1
542 |     while True:
543 |         if has_extension:
544 |             new_file_path = file_path.parent / f"{file_path.stem}-{i}{file_path.suffix}"
545 | 
546 |         else:
547 |             new_file_path = file_path.parent / f"{file_path}-{i}"
548 | 
549 |         if not new_file_path.exists():
550 |             return new_file_path
551 | 
552 |         i += 1
553 | 
554 | 
555 | def generate_temp_media_path(media_data: Movie | Episode, source: str | None = None) -> Path:
556 |     """
557 |     Generate a temporary directory for downloading media data.
558 | 
559 |     Args:
560 |         media_data (Movie | Episode): A movie or episode data object.
561 |         source (str | None, optional): Abbreviation of the source to use for file names. Defaults to None.
562 | 
563 |     Returns:
564 |         Path: A path to the temporary folder.
565 |     """
566 |     temp_folder_name = generate_media_folder_name(media_data=media_data, source=source)
567 |     path = generate_non_conflicting_path(file_path=TEMP_FOLDER_PATH / temp_folder_name, has_extension=False)
568 | 
569 |     return TempDirGenerator.generate(directory_name=path.name)
570 | 
571 | 
572 | def merge_dict_values(*dictionaries: dict) -> dict:
573 |     """
574 |     A function for merging the values of multiple dictionaries using the same keys.
575 |     If a key already exists, the value will be added to a list of values mapped to that key.
576 | 
577 |     Examples:
578 |         merge_dict_values({'a': 1, 'b': 3}, {'a': 2, 'b': 4}) -> {'a': [1, 2], 'b': [3, 4]}
579 |         merge_dict_values({'a': 1, 'b': 2}, {'a': 1, 'b': [2, 3]}) -> {'a': 1, 'b': [2, 3]}
580 | 
581 |     Note:
582 |         This function support only merging of lists or single items (no tuples or other iterables),
583 |         and without any nesting (lists within lists).
584 | 
585 |     Args:
586 |         *dictionaries (dict): Dictionaries to merge.
587 | 
588 |     Returns:
589 |         dict: A merged dictionary.
590 |     """
591 |     _dictionaries: list[dict] = [d for d in dictionaries if d]
592 | 
593 |     if len(_dictionaries) == 0:
594 |         return {}
595 | 
596 |     if len(_dictionaries) == 1:
597 |         return _dictionaries[0]
598 | 
599 |     result: dict = {}
600 | 
601 |     for _dict in _dictionaries:
602 |         for key, value in _dict.items():
603 |             if key in result:
604 |                 if isinstance(result[key], list):
605 |                     if isinstance(value, list):
606 |                         result[key].extend(value)
607 |                     else:
608 |                         result[key].append(value)
609 |                 else:
610 |                     if isinstance(value, list):
611 |                         result[key] = [result[key], *value]
612 |                     else:
613 |                         result[key] = [result[key], value]
614 |             else:
615 |                 result[key] = value
616 | 
617 |     return result
618 | 
619 | 
620 | def raise_for_status(response: httpx.Response) -> None:
621 |     """
622 |     Raise an exception if the response status code is invalid.
623 |     Uses 'response.raise_for_status()' internally, with additional logging.
624 | 
625 |     Args:
626 |         response (httpx.Response): A response object.
627 |     """
628 |     truncation_threshold = 1500
629 | 
630 |     if not response.is_error:
631 |         return
632 | 
633 |     if len(response.text) > truncation_threshold:
634 |         # Truncate the response as in some cases there could be an unexpected long HTML response
635 |         response_text = response.text[:truncation_threshold].rstrip() + " <TRUNCATED...>"
636 | 
637 |     else:
638 |         response_text = response.text
639 | 
640 |     logger.debug(f"Response status code: {response.status_code}")
641 | 
642 |     if response.headers.get('Content-Type'):
643 |         logger.debug(f"Response type: {response.headers['Content-Type']}")
644 | 
645 |     logger.debug(f"Response text: {response_text}")
646 | 
647 |     response.raise_for_status()
648 | 
649 | 
650 | def parse_url_params(url_params: str) -> dict:
651 |     """
652 |     Parse GET parameters from a URL to a dictionary.
653 | 
654 |     Args:
655 |         url_params (str): URL parameters. (e.g. 'param1=value1&param2=value2')
656 | 
657 |     Returns:
658 |         dict: A dictionary containing the URL parameters.
659 |     """
660 |     url_params = url_params.split('?')[-1].rstrip('&')
661 |     params_list = url_params.split('&')
662 | 
663 |     if len(params_list) == 0 or \
664 |             (len(params_list) == 1 and '=' not in params_list[0]):
665 |         return {}
666 | 
667 |     return {key: value for key, value in (param.split('=') for param in params_list)}
668 | 
669 | 
670 | @overload
671 | def return_first_valid(*values: T | None, raise_error: Literal[True] = ...) -> T:
672 |     ...
673 | 
674 | 
675 | @overload
676 | def return_first_valid(*values: T | None, raise_error: Literal[False] = ...) -> T | None:
677 |     ...
678 | 
679 | 
680 | def return_first_valid(*values: T | None, raise_error: bool = False) -> T | None:
681 |     """
682 |     Return the first non-None value from a list of values.
683 | 
684 |     Args:
685 |         *values (T): Values to check.
686 |         raise_error (bool, optional): Whether to raise an error if all values are None. Defaults to False.
687 | 
688 |     Returns:
689 |         T | None: The first non-None value, or None if all values are None and `raise_error` is False.
690 | 
691 |     Raises:
692 |         ValueError: If all values are None and `raise_error` is True.
693 |     """
694 |     for value in values:
695 |         if value is not None:
696 |             return value
697 | 
698 |     if raise_error:
699 |         raise ValueError("No valid value found.")
700 | 
701 |     return None
702 | 
703 | def single_string_to_list(item: str | list[str]) -> list[str]:
704 |     """
705 |     Convert a single string to a list containing the string.
706 |     If None is passed, an empty list will be returned.
707 | 
708 |     Args:
709 |         item (str | list[str]): A string or a list of strings.
710 | 
711 |     Returns:
712 |         list[str]: A list containing the string, or an empty list if None was passed.
713 |     """
714 |     if item is None:
715 |         return []
716 | 
717 |     if isinstance(item, list):
718 |         return item
719 | 
720 |     return [item]
721 | 
722 | 
723 | def split_subtitles_timestamp(timestamp: str) -> tuple[dt.time, dt.time]:
724 |     """
725 |     Split a subtitles timestamp into start and end.
726 | 
727 |     Args:
728 |         timestamp (str): A subtitles timestamp. For example: "00:00:00.000 --> 00:00:00.000"
729 | 
730 |     Returns:
731 |         tuple(time, time): A tuple containing start and end times as a datetime object.
732 |     """
733 |     # Support ',' character in timestamp's milliseconds (used in SubRip format).
734 |     timestamp = timestamp.replace(',', '.')
735 | 
736 |     start_time, end_time = timestamp.split(" --> ")
737 |     return dt.time.fromisoformat(start_time), dt.time.fromisoformat(end_time)
738 | 
739 | 
740 | @lru_cache
741 | def standardize_title(title: str) -> str:
742 |     """
743 |     Format movie title to a standardized title that can be used as a file name.
744 | 
745 |     Args:
746 |         title (str): A movie title.
747 | 
748 |     Returns:
749 |         str: The movie title, in a file-name-friendly format.
750 |     """
751 |     title = title.strip()
752 | 
753 |     for string, replacement_string in TITLE_REPLACEMENT_STRINGS.items():
754 |         title = title.replace(string, replacement_string)
755 | 
756 |     title = re.sub(r"\.+", ".", title)  # Replace multiple dots with a single dot
757 | 
758 |     # If running on Windows, rename Windows reserved names to allow file creation
759 |     if sys.platform == 'win32':
760 |         split_title = title.split('.')
761 | 
762 |         if split_title[0].upper() in WINDOWS_RESERVED_FILE_NAMES:
763 |             if len(split_title) > 1:
764 |                 return split_title[0] + split_title[1] + '.'.join(split_title[2:])
765 | 
766 |             if len(split_title) == 1:
767 |                 return "_" + title
768 | 
769 |     return title
770 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "isubrip"
  3 | version = "2.6.3"
  4 | description = "A Python package for scraping and downloading subtitles from AppleTV / iTunes movie pages."
  5 | authors = [
  6 |     {name = "Michael Yochpaz"}
  7 | ]
  8 | readme = "README.md"
  9 | keywords = [
 10 |     "iTunes",
 11 |     "AppleTV",
 12 |     "movies",
 13 |     "subtitles",
 14 |     "scrape",
 15 |     "scraper",
 16 |     "download",
 17 |     "m3u8"
 18 | ]
 19 | classifiers = [
 20 |     "Development Status :: 5 - Production/Stable",
 21 |     "Intended Audience :: End Users/Desktop",
 22 |     "Operating System :: Microsoft :: Windows",
 23 |     "Operating System :: MacOS",
 24 |     "Operating System :: POSIX :: Linux",
 25 |     "Topic :: Utilities",
 26 |     "License :: OSI Approved :: MIT License",
 27 |     "Programming Language :: Python :: 3.9",
 28 |     "Programming Language :: Python :: 3.10",
 29 |     "Programming Language :: Python :: 3.11",
 30 |     "Programming Language :: Python :: 3.12",
 31 |     "Programming Language :: Python :: 3.13",
 32 | ]
 33 | requires-python = ">= 3.9"
 34 | dependencies = [
 35 |     "httpx[http2]>=0.28.1",
 36 |     "m3u8>=6.0.0",
 37 |     "pydantic>=2.10.6",
 38 |     "pydantic-settings>=2.7.1",
 39 |     "pygments>=2.19.1",  # Used by 'rich'. Specified here as version 2.18 appears to cause issues.
 40 |     "rich>=13.9.4",
 41 |     "tomli>=2.0.2",
 42 |     "wcwidth>=0.2.13",
 43 | ]
 44 | 
 45 | [project.urls]
 46 | Homepage = "https://github.com/MichaelYochpaz/iSubRip"
 47 | Repository = "https://github.com/MichaelYochpaz/iSubRip"
 48 | Issues = "https://github.com/MichaelYochpaz/iSubRip/issues"
 49 | Changelog = "https://github.com/MichaelYochpaz/iSubRip/blob/main/CHANGELOG.md"
 50 | 
 51 | [project.scripts]
 52 | isubrip = "isubrip.__main__:main"
 53 | 
 54 | [build-system]
 55 | requires = ["hatchling"]
 56 | build-backend = "hatchling.build"
 57 | 
 58 | [tool.uv]
 59 | dev-dependencies = [
 60 |     "mypy>=1.14.1",
 61 |     "ruff>=0.9.3",
 62 | ]
 63 | 
 64 | [tool.mypy]
 65 | check_untyped_defs = true
 66 | disallow_untyped_defs = true
 67 | explicit_package_bases = true
 68 | ignore_missing_imports = true
 69 | python_version = "3.9"
 70 | warn_return_any = true
 71 | plugins = [
 72 |     "pydantic.mypy"
 73 | ]
 74 | 
 75 | [tool.ruff]
 76 | line-length = 120
 77 | target-version = "py39"
 78 | 
 79 | [tool.ruff.lint]
 80 | select = [
 81 |     "ARG",
 82 |     "ASYNC",
 83 |     "B",
 84 |     "C4",
 85 |     "COM",
 86 |     "E",
 87 |     "F",
 88 |     "FA",
 89 |     "I",
 90 |     "INP",
 91 |     "ISC",
 92 |     "N",
 93 |     "PIE",
 94 |     "PGH",
 95 |     "PT",
 96 |     "PTH",
 97 |     "Q",
 98 |     "RSE",
 99 |     "RET",
100 |     "RUF",
101 |     "S",
102 |     "SIM",
103 |     "SLF",
104 |     "T20",
105 |     "TCH",
106 |     "TID",
107 |     "TRY",
108 |     "UP",
109 | ]
110 | ignore = [
111 |     "C416",
112 |     "Q000",
113 |     "RUF010",
114 |     "RUF012",
115 |     "SIM108",
116 |     "TD002",
117 |     "TD003",
118 |     "TRY003",
119 | ]
120 | unfixable = ["ARG"]
121 | 
122 | [tool.ruff.lint.flake8-tidy-imports]
123 | ban-relative-imports = "all"
124 | 
125 | [tool.ruff.lint.flake8-quotes]
126 | docstring-quotes = "double"
127 | 
128 | [tool.ruff.lint.isort]
129 | force-sort-within-sections = true
130 | 
131 | [tool.ruff.lint.pyupgrade]
132 | keep-runtime-typing = true
133 | 


--------------------------------------------------------------------------------