├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── README.zh_CN.md ├── config.example.toml ├── podmaker ├── __init__.py ├── __main__.py ├── cli.py ├── config │ ├── __init__.py │ ├── core.py │ └── storage.py ├── fetcher │ ├── __init__.py │ ├── core.py │ └── youtube.py ├── processor │ ├── __init__.py │ ├── core.py │ ├── scheduling.py │ └── task.py ├── rss │ ├── README.md │ ├── __init__.py │ ├── core.py │ ├── enclosure.py │ ├── episode.py │ ├── podcast.py │ └── util │ │ ├── __init__.py │ │ ├── namespace.py │ │ └── parse.py ├── storage │ ├── __init__.py │ ├── core.py │ ├── local.py │ └── s3.py └── util │ ├── __init__.py │ ├── exit.py │ └── retry_util.py ├── poetry.lock ├── pyproject.toml ├── systemd └── podmaker.service └── tests ├── __init__.py ├── data ├── apple.rss.test.xml └── google.rss.test.xml ├── helper.py ├── provider ├── __init__.py ├── test_resource.py └── test_youtube.py ├── storage ├── __init__.py ├── test_local.py └── test_s3.py ├── test_config.py ├── test_rss.py └── util ├── __init__.py └── test_retry.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | config.toml 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/python 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 7 | 8 | ### Python ### 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # poetry 106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 107 | # This is especially recommended for binary packages to ensure reproducibility, and is more 108 | # commonly ignored for libraries. 109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 110 | #poetry.lock 111 | 112 | # pdm 113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 114 | #pdm.lock 115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 116 | # in version control. 117 | # https://pdm.fming.dev/#use-with-ide 118 | .pdm.toml 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | ### Python Patch ### 171 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 172 | poetry.toml 173 | 174 | # ruff 175 | .ruff_cache/ 176 | 177 | # LSP config files 178 | pyrightconfig.json 179 | 180 | # End of https://www.toptal.com/developers/gitignore/api/python 181 | 182 | 183 | # Created by https://www.toptal.com/developers/gitignore/api/linux 184 | # Edit at https://www.toptal.com/developers/gitignore?templates=linux 185 | 186 | ### Linux ### 187 | *~ 188 | 189 | # temporary files which can be created if a process still has a handle open of a deleted file 190 | .fuse_hidden* 191 | 192 | # KDE directory preferences 193 | .directory 194 | 195 | # Linux trash folder which might appear on any partition or disk 196 | .Trash-* 197 | 198 | # .nfs files are created when an open file is removed but is still being accessed 199 | .nfs* 200 | 201 | # End of https://www.toptal.com/developers/gitignore/api/linux 202 | 203 | # Created by https://www.toptal.com/developers/gitignore/api/macos 204 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos 205 | 206 | ### macOS ### 207 | # General 208 | .DS_Store 209 | .AppleDouble 210 | .LSOverride 211 | 212 | # Icon must end with two \r 213 | Icon 214 | 215 | 216 | # Thumbnails 217 | ._* 218 | 219 | # Files that might appear in the root of a volume 220 | .DocumentRevisions-V100 221 | .fseventsd 222 | .Spotlight-V100 223 | .TemporaryItems 224 | .Trashes 225 | .VolumeIcon.icns 226 | .com.apple.timemachine.donotpresent 227 | 228 | # Directories potentially created on remote AFP share 229 | .AppleDB 230 | .AppleDesktop 231 | Network Trash Folder 232 | Temporary Items 233 | .apdisk 234 | 235 | ### macOS Patch ### 236 | # iCloud generated files 237 | *.icloud 238 | 239 | # End of https://www.toptal.com/developers/gitignore/api/macos 240 | 241 | # Created by https://www.toptal.com/developers/gitignore/api/windows 242 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows 243 | 244 | ### Windows ### 245 | # Windows thumbnail cache files 246 | Thumbs.db 247 | Thumbs.db:encryptable 248 | ehthumbs.db 249 | ehthumbs_vista.db 250 | 251 | # Dump file 252 | *.stackdump 253 | 254 | # Folder config file 255 | [Dd]esktop.ini 256 | 257 | # Recycle Bin used on file shares 258 | $RECYCLE.BIN/ 259 | 260 | # Windows Installer files 261 | *.cab 262 | *.msi 263 | *.msix 264 | *.msm 265 | *.msp 266 | 267 | # Windows shortcuts 268 | *.lnk 269 | 270 | # End of https://www.toptal.com/developers/gitignore/api/windows 271 | 272 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.9.0 (2023-09-11) 2 | 3 | ### Feat 4 | 5 | - **prrocessor.scheduling**: support custom interval 6 | 7 | ## 0.8.1 (2023-09-01) 8 | 9 | ### Fix 10 | 11 | - fix typo 12 | 13 | ## 0.8.0 (2023-09-01) 14 | 15 | ### BREAKING CHANGE 16 | 17 | - Should install yt_dlp and boto3 by extra dependencies. 18 | 19 | ### Feat 20 | 21 | - **fetcher,storage**: make yt_dlp and boto3 optional 22 | - **util**: add retry decorator 23 | - **fetcher.core,processor**: add start and stop hook to fetcher 24 | 25 | ### Fix 26 | 27 | - **util**: rename retry module to retry_util 28 | 29 | ## 0.7.4 (2023-08-27) 30 | 31 | ### Fix 32 | 33 | - **rss.podcast**: fix pu_bdate format 34 | - **fetcher.processor**: check exit signal 35 | 36 | ## 0.7.3 (2023-08-26) 37 | 38 | ### Fix 39 | 40 | - **config**: fix tomlkit 41 | 42 | ## 0.7.2 (2023-08-26) 43 | 44 | ### Fix 45 | 46 | - **config.core**: fix union 47 | 48 | ## 0.7.1 (2023-08-26) 49 | 50 | ### Fix 51 | 52 | - **config.storage**: remove absmeta 53 | 54 | ## 0.7.0 (2023-08-25) 55 | 56 | ### Feat 57 | 58 | - **storage**: support local storage 59 | 60 | ### Fix 61 | 62 | - **cli**: support local storage 63 | - **rss.podcast**: fix items merge 64 | - **fetcher.youtube**: add source id to skip log 65 | 66 | ## 0.6.1 (2023-08-25) 67 | 68 | ### Fix 69 | 70 | - **fetcher.youtube**: cache dir 71 | 72 | ## 0.6.0 (2023-08-25) 73 | 74 | ### Feat 75 | 76 | - **fetcher.youtube**: add source id to log 77 | 78 | ## 0.5.0 (2023-08-25) 79 | 80 | ### Feat 81 | 82 | - **storage**: support start and stop storage 83 | - **config**: support filter episodes by regex 84 | - **config**: use storage instead s3 85 | 86 | ## 0.4.0 (2023-08-24) 87 | 88 | ### Feat 89 | 90 | - **fetcher**: support youtube channel 91 | 92 | ### Fix 93 | 94 | - **rss.core**: remove stylesheet 95 | - **fetcher.youtube**: catch download error 96 | - **rss.podcast**: fix image url 97 | 98 | ## 0.3.1 (2023-08-23) 99 | 100 | ### Fix 101 | 102 | - **peocessor.task**: fix mime 103 | - **asset**: fix script url 104 | 105 | ## 0.3.0 (2023-08-23) 106 | 107 | ### BREAKING CHANGE 108 | 109 | - changes for config file 110 | 111 | ### Feat 112 | 113 | - **rss**: add stylesheet 114 | - add exit signal 115 | 116 | ### Fix 117 | 118 | - **rss.core**: fix encoding of rss bytes 119 | - **config**: change s3.cdn_prefix to s3.public_endpoint 120 | 121 | ### Refactor 122 | 123 | - **processor**: move execution to Task class, and support task hook 124 | 125 | ## 0.2.2 (2023-08-22) 126 | 127 | ### Fix 128 | 129 | - **fetcher.youtube**: fetch image and link for episode 130 | 131 | ## 0.2.1 (2023-08-21) 132 | 133 | ### Fix 134 | 135 | - **config**: quote id before used to generate storage key 136 | 137 | ## 0.2.0 (2023-08-21) 138 | 139 | ### Feat 140 | 141 | - add cli 142 | - **processor**: add processor 143 | - **rss**: support mergation 144 | - **rss**: use qname to manage namespace 145 | - **rss**: support load rss object from xml string 146 | - **rss.core**: add plain resource and rss deserializer 147 | - **config**: add source config 148 | - **config**: support optional env and required env 149 | - add config 150 | - **rss**: add rss generator and serializer 151 | - complete youtube parser and s3 storage 152 | 153 | ### Fix 154 | 155 | - **rss**: compatible with apple's requirements 156 | - **processor.scheduling**: add next run time to add_job 157 | - **config**: fix decorator 158 | - **config**: fix tomlkit 159 | - **processor.scheduling**: fix shutdown 160 | - **fetcher.youtube**: fix logger 161 | - **config**: rename source.name to source.id 162 | - **processor.schedulling**: fix shutdown 163 | - **processor.core**: fix rss key 164 | - **rss**: fix text 165 | - **config**: add app config 166 | - **processor.core**: fix original file 167 | - change cli argument 168 | - **fetcher.youtube**: add lock 169 | - **fetcher**: rename parser to fetcher 170 | - **parser.youtube,-storage.s3**: remove redundant config dependency 171 | - **rss**: reduce public class 172 | - **env**: use dataclass as env object 173 | - **rss.podcast**: fix category pattern 174 | - supplement podcast field 175 | 176 | ### Refactor 177 | 178 | - **cli**: remove cli logic to cli module 179 | - add log 180 | - **rss**: hide unnecessary property 181 | - **parser.youtube**: use lru_cache 182 | - **config**: rename env to config and use pydantic manage config 183 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Podmaker 2 | 3 | *Read this document in other languages: [English](README.md), [简体中文](README.zh_CN.md)* 4 | 5 | Convert online media into podcast feeds. 6 | 7 | ![PyPI - Version](https://img.shields.io/pypi/v/podmaker) 8 | ![PyPI - Status](https://img.shields.io/pypi/status/podmaker) 9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/podmaker) 10 | ![PyPI - Implementation](https://img.shields.io/pypi/implementation/podmaker) 11 | ![PyPI - License](https://img.shields.io/pypi/l/podmaker) 12 | 13 | 14 | ## Features 15 | 16 | - Extract audio from online videos. 17 | - No need to deploy web services. 18 | - Generate podcast feeds. 19 | - Deploy with watch mode to keep feeds up-to-date. 20 | 21 | ## Dependencies 22 | 23 | This tool uses **ffmpeg** to extract audio from videos. Ensure it's installed within `$PATH` before using this tool. 24 | 25 | Additionally, you should install extra dependencies according to your requirements: 26 | 27 | - `podmaker[all]`: Install all extra dependencies. 28 | - `podmaker[s3]`: Install dependencies for S3 storage. 29 | - `podmaker[youtube]`: Install dependencies for YouTube. 30 | 31 | Install multiple extra dependencies simultaneously using `podmaker[extra1,extra2,...]`. 32 | 33 | ## Configuration 34 | 35 | Before diving into this tool, craft a configuration file, a TOML file to be precise. 36 | By default, the file resides at `${WORK_DIR}/config.toml`. Customize the path using the `-c` or `--config` option. 37 | An example configuration file can be found at [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml). 38 | 39 | ## Usage 40 | 41 | ### Systemd 42 | 43 | Deploy this tool in the background with systemd (requires root privileges): 44 | 45 | ```bash 46 | # create virtual environment 47 | apt install python3 python3-venv 48 | mkdir -p /opt/podmaker && cd /opt/podmaker 49 | python3 -m venv venv 50 | 51 | # install podmaker 52 | ./venv/bin/pip install "podmaker[all]" 53 | 54 | # create and edit config file 55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml 56 | vim config.toml 57 | 58 | # create systemd service 59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service 60 | systemctl daemon-reload 61 | 62 | # enable and start service 63 | systemctl enable podmaker 64 | systemctl start podmaker 65 | ``` 66 | 67 | ### Manual 68 | 69 | ### Using pip 70 | 71 | For the optimal experience, we recommend installing this tool within a virtual environment. 72 | 73 | ```bash 74 | pip install "podmaker[all]" 75 | ``` 76 | 77 | ### Using `pipx` 78 | 79 | ```bash 80 | pipx install "podmaker[all]" 81 | ``` 82 | 83 | ### Run 84 | 85 | ```bash 86 | podmaker -c path/to/config.toml 87 | ``` 88 | 89 | or 90 | 91 | ```bash 92 | python -m podmaker -c path/to/config.toml 93 | ``` 94 | 95 | ## Roadmap 96 | 97 | ### Platforms 98 | 99 | - [x] YouTube 100 | - [x] Playlist 101 | - [x] Channel 102 | - [ ] BiliBili 103 | 104 | ### Resource Hosting 105 | 106 | - [x] S3 107 | - [x] Local 108 | 109 | ## Contributing 110 | 111 | Your contributions are invaluable. Feel free to submit pull requests. 112 | Before committing, ensure your changes pass unit tests and `autohooks`. 113 | 114 | To activate `autohooks`, use the following command: 115 | 116 | ```bash 117 | poetry run autohooks activate --mode poetry 118 | ``` 119 | 120 | This process will automatically lint, format, and sort code imports. 121 | 122 | When introducing new features, remember to provide corresponding tests. 123 | 124 | ## License 125 | 126 | For licensing details, refer to [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE). -------------------------------------------------------------------------------- /README.zh_CN.md: -------------------------------------------------------------------------------- 1 | # Podmaker 2 | 3 | *本文档的其他语言: [English](README.md), [简体中文](README.zh_CN.md)* 4 | 5 | 将在线媒体转换成播客订阅。 6 | 7 | ![PyPI - Version](https://img.shields.io/pypi/v/podmaker) 8 | ![PyPI - Status](https://img.shields.io/pypi/status/podmaker) 9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/podmaker) 10 | ![PyPI - Implementation](https://img.shields.io/pypi/implementation/podmaker) 11 | ![PyPI - License](https://img.shields.io/pypi/l/podmaker) 12 | 13 | 14 | ## 功能 15 | 16 | - 从网络视频中提取音频; 17 | - 无需额外部署 Web 服务; 18 | - 自动生成播客订阅; 19 | - 通过 `watch` 模式自动更新订阅。 20 | 21 | ## 依赖 22 | 23 | 本工具使用 **ffmpeg** 从视频中提取音频,请确保 `$PATH` 中包含 `ffmpeg`。 24 | 25 | 另外, 你可以更根据你的需求安装额外的依赖: 26 | 27 | - `podmaker[all]`: 安装下述的所有依赖; 28 | - `podmaker[s3]`: 提供 S3 支持; 29 | - `podmaker[youtube]`: 提供 YouTube 支持。 30 | 31 | 你可以使用 `podmaker[extra1,extra2,...]` 的方式同时安装多个额外依赖。 32 | 33 | ## 配置 34 | 35 | 在开始使用本工具之前,请先准备一个 TOML 格式的配置文件。 36 | 默认情况下,配置文件位于 `${WORK_DIR}/config.toml`。你可以通过 `-c` 或 `--config` 选项来指定配置文件的路径。 37 | 你可以在 [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml) 中找到一个示例配置文件。 38 | 39 | ## 使用方法 40 | 41 | ### Systemd 42 | 43 | 使用 systemd 后台运行本工具(需要 root 权限): 44 | 45 | ```bash 46 | # 创建虚拟环境 47 | apt install python3 python3-venv 48 | mkdir -p /opt/podmaker && cd /opt/podmaker 49 | python3 -m venv venv 50 | 51 | # 安装 podmaker 52 | ./venv/bin/pip install "podmaker[all]" 53 | 54 | # 创建配置文件 55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml 56 | vim config.toml 57 | 58 | # 创建 systemd 服务 59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service 60 | systemctl daemon-reload 61 | 62 | # 启动服务,并设置开机自启 63 | systemctl enable podmaker 64 | systemctl start podmaker 65 | ``` 66 | 67 | ### 手动运行 68 | 69 | ### 使用 pip 安装 70 | 71 | 为了获得最佳体验,我们建议你在虚拟环境中安装本工具。 72 | 73 | ```bash 74 | pip install "podmaker[all]" 75 | ``` 76 | 77 | ### 使用 `pipx` 安装 78 | 79 | ```bash 80 | pipx install "podmaker[all]" 81 | ``` 82 | 83 | ### 运行 84 | 85 | ```bash 86 | podmaker -c path/to/config.toml 87 | ``` 88 | 89 | 或者 90 | 91 | ```bash 92 | python -m podmaker -c path/to/config.toml 93 | ``` 94 | 95 | ## 项目规划 96 | 97 | ### 平台支持 98 | 99 | - [x] YouTube 100 | - [x] 播放列表 101 | - [x] 频道 102 | - [ ] 哔哩哔哩(鸽) 103 | 104 | ### 资源托管 105 | 106 | - [x] S3 107 | - [x] 本地文件 108 | 109 | ## 贡献指南 110 | 111 | 你的贡献弥足珍贵,请不要吝啬提出你的 Pull Request。 112 | 在提交代码之前,请确保你的代码通过单元测试和 `autohooks`。 113 | 114 | 你可以使用下述命令激活 `autohooks`: 115 | 116 | ```bash 117 | poetry run autohooks activate --mode poetry 118 | ``` 119 | 120 | 这个程序会自动进行代码风格检查、格式化和 import 排序。 121 | 122 | 如果你添加了新的功能,请确保提供了相应的测试。 123 | 124 | ## 许可证 125 | 126 | 查看许可证详情,请参阅 [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE)。 -------------------------------------------------------------------------------- /config.example.toml: -------------------------------------------------------------------------------- 1 | [app] 2 | # running mode, "oneshot" or "watch" 3 | # - oneshot: generate the feed and exit 4 | # - watch: generate the feed and watch for changes 5 | mode = "oneshot" 6 | 7 | # level of logging, "DEBUG", "INFO", "WARNING", "ERROR" 8 | loglevel = "INFO" 9 | 10 | # optional, the admin of the feed 11 | [owner] 12 | name = "podmaker" 13 | email = "admin@podmaker.dev" 14 | 15 | # notice: the sources is an array, it must specify using `[[]]` 16 | [[sources]] 17 | # used to generate the feed url, must be unique, prefer to use numbers, letters, space and underscores 18 | id = "source_1" 19 | # optional, the display name of the source 20 | name = "Source 1" 21 | # optional, the regex to match the episode 22 | regex = "Episode \\d+" 23 | # the url of the source 24 | url = "https://example.com/source_1/" 25 | # optional, the interval to check the source, in seconds, default to 3600 26 | interval = 3600 27 | 28 | [[sources]] 29 | id = "source_2" 30 | name = "Source 2" 31 | regex = "Episode \\d+" 32 | url = "https://example.com/source_2/" 33 | interval = 3600 34 | 35 | # only one is allowed to be specified 36 | [storage] 37 | # destination of the generated feed, support "local" and "s3" 38 | dest = "local" 39 | # the directory to store the generated feed, your must change it 40 | # the files will save to $base_dir/data/, and the feed will save to $base_dir/data/feed.xml 41 | # you can use nginx to serve the $base_dir/data/ 42 | # !!WARNING!! don't serve the $base_dir/ directly, it may leak your config file 43 | base_dir = "/path/to/storage" 44 | # must be public-read, this endpoint should be pointed to $base_dir/data/ 45 | public_endpoint = "https://example.com/" 46 | 47 | #[storage] 48 | #dest = "s3" 49 | #access_key = "123" 50 | #access_secret = "456" 51 | #bucket = "podmake" 52 | #endpoint = "https://s3.amazonaws.com/" 53 | #public_endpoint = "https://s3.amazonaws.com/" 54 | -------------------------------------------------------------------------------- /podmaker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/__init__.py -------------------------------------------------------------------------------- /podmaker/__main__.py: -------------------------------------------------------------------------------- 1 | from podmaker.cli import run 2 | 3 | if __name__ == '__main__': 4 | run() 5 | -------------------------------------------------------------------------------- /podmaker/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | from pathlib import Path 5 | 6 | from podmaker.config import ConfigError, PMConfig 7 | from podmaker.processor import get_processor 8 | from podmaker.storage import get_storage 9 | from podmaker.util import exit_signal 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def run() -> None: 15 | parser = argparse.ArgumentParser(prog='podmaker', description='Podcast generator.') 16 | parser.add_argument('-c', '--conf', help='Path to config file (default: config.toml).', type=Path, 17 | default=Path('config.toml')) 18 | args = parser.parse_args() 19 | config_path = args.conf 20 | config: PMConfig 21 | try: 22 | config = PMConfig.from_file(config_path) 23 | except ConfigError as e: 24 | logger.error(e) 25 | sys.exit(1) 26 | logging.basicConfig( 27 | level=config.app.loglevel, 28 | format='%(asctime)s %(levelname)s %(name)s %(message)s', 29 | ) 30 | storage = get_storage(config.storage) 31 | storage.start() 32 | logger.info(f'running in {config.app.mode} mode') 33 | processor = get_processor(config, storage) 34 | exit_signal.listen() 35 | try: 36 | processor.run() 37 | except BaseException: 38 | storage.stop() 39 | raise 40 | -------------------------------------------------------------------------------- /podmaker/config/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['OwnerConfig', 'AppConfig', 'StorageConfig', 'SourceConfig', 'PMConfig', 'ConfigError', 'S3Config', 2 | 'LocalConfig'] 3 | 4 | from podmaker.config.core import AppConfig, ConfigError, OwnerConfig, PMConfig, SourceConfig 5 | from podmaker.config.storage import LocalConfig, S3Config, StorageConfig 6 | -------------------------------------------------------------------------------- /podmaker/config/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import sys 5 | from pathlib import PurePath 6 | from typing import Literal, Optional, Union 7 | from urllib.parse import quote 8 | 9 | from pydantic import BaseModel, EmailStr, Field, HttpUrl, ValidationError 10 | 11 | from podmaker.config.storage import LocalConfig, S3Config 12 | 13 | if sys.version_info >= (3, 11): 14 | import tomllib as toml 15 | else: 16 | import tomlkit as toml 17 | 18 | 19 | class OwnerConfig(BaseModel): 20 | name: Optional[str] = Field(None, min_length=1, frozen=True) 21 | email: EmailStr = Field(frozen=True) 22 | 23 | 24 | # noinspection PyNestedDecorators 25 | class AppConfig(BaseModel): 26 | mode: Literal['oneshot', 'watch'] = Field('oneshot', frozen=True) 27 | loglevel: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = Field('INFO', frozen=True) 28 | 29 | 30 | class SourceConfig(BaseModel): 31 | id: str = Field(min_length=1, frozen=True) 32 | name: Optional[str] = Field(None, min_length=1, frozen=True) 33 | regex: Optional[re.Pattern[str]] = Field(None, frozen=True) 34 | url: HttpUrl = Field(frozen=True) 35 | interval: int = Field(1 * 60 * 60, ge=1, frozen=True) 36 | 37 | def get_storage_key(self, key: str) -> str: 38 | return f'{quote(self.id)}/{key}' 39 | 40 | 41 | class ConfigError(Exception): 42 | pass 43 | 44 | 45 | class PMConfig(BaseModel): 46 | owner: Optional[OwnerConfig] = Field(None, frozen=True) 47 | storage: Union[S3Config, LocalConfig] = Field(frozen=True) 48 | sources: tuple[SourceConfig, ...] = Field(frozen=True) 49 | app: AppConfig = Field(default_factory=AppConfig, frozen=True) 50 | 51 | @classmethod 52 | def from_file(cls, path: PurePath) -> PMConfig: 53 | try: 54 | with open(path, 'rb') as f: 55 | doc = toml.load(f) 56 | # https://github.com/sdispater/tomlkit/issues/275 57 | if getattr(doc, 'unwrap', None): 58 | data = doc.unwrap() 59 | else: 60 | data = doc 61 | except FileNotFoundError as e: 62 | raise ConfigError(f'config file not found: {path}') from e 63 | try: 64 | return cls(**data) 65 | except ValidationError as e: 66 | raise ConfigError(f'can not initial config: {e}') 67 | -------------------------------------------------------------------------------- /podmaker/config/storage.py: -------------------------------------------------------------------------------- 1 | from pathlib import PurePath 2 | from typing import Literal 3 | 4 | from pydantic import BaseModel, Field, HttpUrl 5 | 6 | SupportedStorage = Literal['s3', 'local'] 7 | 8 | 9 | class StorageConfig(BaseModel): 10 | dest: SupportedStorage = Field(min_length=1, frozen=True) 11 | 12 | 13 | class S3Config(StorageConfig): 14 | dest: Literal['s3'] = Field(frozen=True) 15 | access_key: str = Field(min_length=1, frozen=True) 16 | access_secret: str = Field(min_length=1, frozen=True) 17 | bucket: str = Field(min_length=1, frozen=True) 18 | endpoint: HttpUrl = Field(frozen=True) 19 | public_endpoint: HttpUrl = Field(frozen=True) 20 | 21 | 22 | class LocalConfig(StorageConfig): 23 | dest: Literal['local'] = Field(frozen=True) 24 | base_dir: PurePath = Field(min_length=1, frozen=True) 25 | public_endpoint: HttpUrl = Field(frozen=True) 26 | -------------------------------------------------------------------------------- /podmaker/fetcher/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['Fetcher'] 2 | 3 | from podmaker.fetcher.core import Fetcher 4 | -------------------------------------------------------------------------------- /podmaker/fetcher/core.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from podmaker.config import SourceConfig 4 | from podmaker.rss import Podcast 5 | 6 | 7 | class Fetcher(ABC): 8 | @abstractmethod 9 | def fetch(self, source: SourceConfig) -> Podcast: 10 | raise NotImplementedError 11 | 12 | def start(self) -> None: 13 | pass 14 | 15 | def stop(self) -> None: 16 | pass 17 | -------------------------------------------------------------------------------- /podmaker/fetcher/youtube.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __all__ = ['YouTube'] 4 | 5 | import logging 6 | import os 7 | import sys 8 | import tempfile 9 | from datetime import datetime, timedelta, timezone 10 | from functools import lru_cache 11 | from inspect import isgenerator 12 | from tempfile import TemporaryDirectory 13 | from typing import Any, Iterable 14 | from urllib.parse import ParseResult, urlparse 15 | 16 | from podmaker.config import OwnerConfig, SourceConfig 17 | from podmaker.fetcher import Fetcher 18 | from podmaker.rss import Enclosure, Episode, Owner, Podcast, Resource 19 | from podmaker.rss.core import PlainResource 20 | from podmaker.storage import Storage 21 | from podmaker.util import exit_signal 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | try: 26 | import yt_dlp 27 | except ImportError: 28 | logger.error('yt_dlp is not installed. youtube fetcher is not available.') 29 | sys.exit(1) 30 | 31 | 32 | class YouTube(Fetcher): 33 | def __init__(self, storage: Storage, owner_config: OwnerConfig | None): 34 | self.storage = storage 35 | self.ydl_opts = { 36 | 'logger': logging.getLogger('yt_dlp'), 37 | 'cachedir': tempfile.gettempdir(), 38 | } 39 | self.owner_config = owner_config 40 | 41 | def fetch_info(self, url: str) -> dict[str, Any]: 42 | with yt_dlp.YoutubeDL(self.ydl_opts) as ydl: 43 | info = ydl.extract_info(str(url), download=False, process=False) # type: dict[str, Any] 44 | return info 45 | 46 | def fetch(self, source: SourceConfig) -> Podcast: 47 | info = self.fetch_info(str(source.url)) 48 | if isgenerator(info.get('entries', None)): 49 | return self.fetch_entries(info, source) 50 | raise ValueError(f'unsupported url: {source.url}') 51 | 52 | def fetch_entries(self, info: dict[str, Any], source: SourceConfig) -> Podcast: 53 | logger.info(f'[{source.id}] parse entries: {source.url}') 54 | if self.owner_config: 55 | owner = Owner(name=self.owner_config.name, email=self.owner_config.email) 56 | else: 57 | owner = None 58 | podcast = Podcast( 59 | items=Entry(info.get('entries', []), self.ydl_opts, self.storage, source), 60 | link=urlparse(info['webpage_url']), 61 | title=source.name or info['title'], 62 | image=EntryThumbnail(info['thumbnails']), 63 | description=info['description'], 64 | owner=owner, 65 | author=info['uploader'], 66 | categories=info.get('tags', []), 67 | ) 68 | return podcast 69 | 70 | 71 | class Entry(Resource[Iterable[Episode]]): 72 | def __init__( 73 | self, entries: Iterable[dict[str, Any]], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig): 74 | self.entries = entries 75 | self.ydl_opts = ydl_opts 76 | self.storage = storage 77 | self.source = source 78 | 79 | def get(self) -> Iterable[Episode] | None: 80 | logger.debug(f'[{self.source.id}] fetch items') 81 | with yt_dlp.YoutubeDL(self.ydl_opts) as ydl: 82 | is_empty = True 83 | for entry in self.entries: 84 | exit_signal.check() 85 | is_empty = False 86 | try: 87 | video_info = ydl.extract_info(entry['url'], download=False) 88 | except yt_dlp.DownloadError as e: 89 | logger.error(f'[{self.source.id}] failed to fetch item({entry["url"]}) due to {e}') 90 | continue 91 | if self.source.regex and not self.source.regex.search(video_info['title']): 92 | logger.info(f'[{self.source.id}] skip item {video_info["id"]} due to regex') 93 | continue 94 | upload_at = datetime.strptime(video_info['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc) 95 | logger.info(f'[{self.source.id}] fetch item: {video_info["id"]}') 96 | yield Episode( 97 | enclosure=Audio(video_info, self.ydl_opts, self.storage, self.source), 98 | title=video_info['title'], 99 | description=video_info['description'], 100 | guid=video_info['id'], 101 | duration=timedelta(seconds=video_info['duration']), 102 | pub_date=upload_at, 103 | link=urlparse(video_info['webpage_url']), 104 | image=PlainResource(urlparse(video_info['thumbnail'])), 105 | ) 106 | if is_empty: 107 | return None 108 | 109 | 110 | class EntryThumbnail(Resource[ParseResult]): 111 | def __init__(self, thumbnails: list[dict[str, Any]]): 112 | self.thumbnails = thumbnails 113 | 114 | def get(self) -> ParseResult | None: 115 | if len(self.thumbnails) == 0: 116 | return None 117 | thumbnail = max(self.thumbnails, key=lambda t: t.get('width', 0)) 118 | result: ParseResult = urlparse(thumbnail['url']) 119 | return result 120 | 121 | 122 | class Audio(Resource[Enclosure]): 123 | def __init__(self, info: dict[str, Any], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig): 124 | self.info = info 125 | self.ydl_opts: dict[str, Any] = { 126 | 'format': 'ba', 127 | 'postprocessors': [{ 128 | 'key': 'FFmpegExtractAudio', 129 | 'preferredcodec': 'mp3', 130 | }], 131 | } 132 | self.ydl_opts.update(ydl_opts) 133 | self.storage = storage 134 | self.source = source 135 | 136 | def upload(self, key: str) -> tuple[ParseResult, int]: 137 | logger.debug(f'[{self.source.id}] upload audio: {key}') 138 | with TemporaryDirectory(prefix='podmaker_youtube_') as cache_dir: 139 | opts = {'paths': {'home': cache_dir}} 140 | opts.update(self.ydl_opts) 141 | with yt_dlp.YoutubeDL(opts) as ydl: 142 | logger.info(f'[{self.source.id}] fetch audio: {self.info["id"]}') 143 | downloaded_info = ydl.extract_info(self.info['webpage_url']) 144 | audio_path = downloaded_info['requested_downloads'][0]['filepath'] 145 | length = os.path.getsize(audio_path) 146 | with open(audio_path, 'rb') as f: 147 | logger.info(f'[{self.source.id}] upload audio: {key}') 148 | url = self.storage.put(f, key=key, content_type='audio/mp3') 149 | return url, length 150 | 151 | @lru_cache(maxsize=1) 152 | def get(self) -> Enclosure | None: 153 | logger.debug(f'[{self.source.id}] fetch audio: {self.info["id"]}') 154 | key = self.source.get_storage_key(f'youtube/{self.info["id"]}.mp3') 155 | info = self.storage.check(key) 156 | if info: 157 | logger.info(f'[{self.source.id}] audio already exists: {key}') 158 | url = info.uri 159 | length = info.size 160 | else: 161 | url, length = self.upload(key) 162 | return Enclosure(url=url, length=length, type='audio/mp3') 163 | -------------------------------------------------------------------------------- /podmaker/processor/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['Processor', 'ScheduleProcessor', 'get_processor'] 2 | 3 | from podmaker.config import PMConfig 4 | from podmaker.processor.core import Processor 5 | from podmaker.processor.scheduling import ScheduleProcessor 6 | from podmaker.storage import Storage 7 | 8 | 9 | def get_processor(config: PMConfig, storage: Storage) -> Processor: 10 | if config.app.mode == 'watch': 11 | return ScheduleProcessor(config=config, storage=storage) 12 | else: 13 | return Processor(config=config, storage=storage) 14 | -------------------------------------------------------------------------------- /podmaker/processor/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from concurrent.futures import ThreadPoolExecutor 5 | from contextlib import contextmanager 6 | from typing import Any, Iterator 7 | 8 | from podmaker.config import PMConfig, SourceConfig 9 | from podmaker.fetcher import Fetcher 10 | from podmaker.processor.task import Task 11 | from podmaker.storage import Storage 12 | from podmaker.util import exit_signal 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class Processor: 18 | def __init__(self, config: PMConfig, storage: Storage): 19 | self._config = config 20 | self._storage = storage 21 | exit_signal.register(self._exit_handler) 22 | self._fetcher_instances: dict[str, Fetcher] = {} 23 | 24 | @contextmanager 25 | def _context(self) -> Iterator[None]: 26 | for fetcher in self._fetcher_instances.values(): 27 | fetcher.start() 28 | try: 29 | yield 30 | finally: 31 | for fetcher in self._fetcher_instances.values(): 32 | fetcher.stop() 33 | 34 | def _get_fetcher(self, source: SourceConfig) -> Fetcher: 35 | if source.url.host not in self._fetcher_instances: 36 | if source.url.host == 'www.youtube.com': 37 | from podmaker.fetcher.youtube import YouTube 38 | self._fetcher_instances[source.url.host] = YouTube(self._storage, self._config.owner) 39 | else: 40 | raise ValueError(f'unsupported host: {source.url.host}') 41 | return self._fetcher_instances[source.url.host] 42 | 43 | @property 44 | def _tasks(self) -> Iterator[Task]: 45 | for source in self._config.sources: 46 | fetcher = self._get_fetcher(source) 47 | yield Task(fetcher, source, self._storage, self._config.owner) 48 | 49 | def _exit_handler(self, *_: Any) -> None: 50 | logger.warning('received exit signal') 51 | self.exit_handler() 52 | 53 | def exit_handler(self, *_: Any) -> None: 54 | pass 55 | 56 | def run(self) -> None: 57 | with self._context(): 58 | with ThreadPoolExecutor(max_workers=5) as executor: 59 | for task in self._tasks: 60 | logger.info(f'submit task: {task.id}') 61 | executor.submit(task.execute) 62 | logger.info('processor exited') 63 | -------------------------------------------------------------------------------- /podmaker/processor/scheduling.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from typing import Any 4 | 5 | from apscheduler.jobstores.base import JobLookupError 6 | from apscheduler.schedulers.blocking import BlockingScheduler 7 | from apscheduler.triggers.interval import IntervalTrigger 8 | 9 | from podmaker.config import PMConfig 10 | from podmaker.processor.core import Processor 11 | from podmaker.storage import Storage 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class ScheduleProcessor(Processor): 17 | def __init__(self, config: PMConfig, storage: Storage): 18 | super().__init__(config, storage) 19 | self._scheduler = BlockingScheduler() 20 | 21 | def exit_handler(self, *_: Any) -> None: 22 | self._scheduler.shutdown(wait=False) 23 | 24 | def _before_hook(self, task_id: str) -> None: 25 | try: 26 | self._scheduler.pause_job(task_id) 27 | except JobLookupError: 28 | logger.warning(f'task({task_id}) not found, maybe it was removed') 29 | 30 | def _after_hook(self, task_id: str) -> None: 31 | try: 32 | self._scheduler.resume_job(task_id) 33 | except JobLookupError: 34 | logger.warning(f'task({task_id}) not found, maybe it was removed') 35 | 36 | def run(self) -> None: 37 | with self._context(): 38 | for task in self._tasks: 39 | logger.info(f'schedule task: {task.id}, it well be run after 1 minute and every 1 hour') 40 | task.before = self._before_hook 41 | task.after = self._after_hook 42 | self._scheduler.add_job( 43 | func=task.execute, 44 | trigger=IntervalTrigger(seconds=task.interval), 45 | next_run_time=datetime.now(), 46 | id=task.id, 47 | name=f'Job-{task.id}', 48 | ) 49 | self._scheduler.start() 50 | logger.info('processor exited') 51 | 52 | -------------------------------------------------------------------------------- /podmaker/processor/task.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from io import BytesIO 5 | from typing import Any, Callable 6 | from uuid import uuid4 7 | 8 | from podmaker.config import OwnerConfig, SourceConfig 9 | from podmaker.fetcher import Fetcher 10 | from podmaker.rss import Podcast 11 | from podmaker.storage import EMPTY_FILE, Storage 12 | from podmaker.util import ExitSignalError 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | Hook = Callable[[str], None] 17 | 18 | 19 | def _do_nothing(*_: Any) -> None: 20 | pass 21 | 22 | 23 | class Task: 24 | def __init__(self, fetcher: Fetcher, source: SourceConfig, storage: Storage, owner: OwnerConfig | None): 25 | self._id = uuid4().hex 26 | logger.info(f'create task {self._id} for {source.id}') 27 | self._source = source 28 | self._storage = storage 29 | self._owner = owner 30 | self._fetcher = fetcher 31 | self.before: Hook = _do_nothing 32 | self.after: Hook = _do_nothing 33 | 34 | @property 35 | def id(self) -> str: 36 | return self._id 37 | 38 | @property 39 | def interval(self) -> int: 40 | return self._source.interval 41 | 42 | def _fetch_original(self, key: str) -> Podcast | None: 43 | with self._storage.get(key) as xml_file: 44 | if xml_file == EMPTY_FILE: 45 | logger.info(f'no original file: {key}') 46 | return None 47 | xml = xml_file.read() 48 | return Podcast.from_rss(xml.decode('utf-8')) 49 | 50 | def _execute(self) -> None: 51 | logger.info(f'execute task: {self.id}') 52 | try: 53 | key = self._source.get_storage_key('feed.rss') 54 | original_pod = self._fetch_original(key) 55 | source_pod = self._fetcher.fetch(self._source) 56 | if original_pod: 57 | has_changed = original_pod.merge(source_pod) 58 | else: 59 | has_changed = True 60 | original_pod = source_pod 61 | if has_changed: 62 | logger.info(f'update: {self._source.id}') 63 | buf = BytesIO(original_pod.bytes) 64 | self._storage.put(buf, key, content_type='text/xml; charset=utf-8') 65 | else: 66 | logger.info(f'no change: {self._source.id}') 67 | except ExitSignalError as e: 68 | logger.warning(f'task ({self.id}) cancelled due to {e}') 69 | except BaseException as e: 70 | logger.error(f'task execute failed: {e} task: {self.id}') 71 | 72 | def execute(self) -> None: 73 | logger.debug(f'task running: {self._source.id}') 74 | self.before(self.id) 75 | self._execute() 76 | logger.debug(f'task finished: {self.id}') 77 | self.after(self.id) 78 | -------------------------------------------------------------------------------- /podmaker/rss/README.md: -------------------------------------------------------------------------------- 1 | Read more about the RSS feed in 2 | the [RSS feed guidelines for Google Podcasts](https://support.google.com/podcast-publishers/answer/9889544?sjid=3442458601435072975-NA) and 3 | [Podcast RSS feed requirements for Apple Podcasts](https://podcasters.apple.com/support/823-podcast-requirements). 4 | -------------------------------------------------------------------------------- /podmaker/rss/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Resource', 3 | 'Enclosure', 4 | 'Episode', 5 | 'Podcast', 6 | 'Owner', 7 | ] 8 | 9 | from podmaker.rss.core import Resource 10 | from podmaker.rss.enclosure import Enclosure 11 | from podmaker.rss.episode import Episode 12 | from podmaker.rss.podcast import Owner, Podcast 13 | -------------------------------------------------------------------------------- /podmaker/rss/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from abc import ABCMeta, abstractmethod 5 | from typing import Any, Generic, TypeVar 6 | from xml.etree.ElementTree import Element, fromstring, tostring 7 | 8 | from podmaker.rss.util.namespace import NamespaceGenerator 9 | from podmaker.rss.util.parse import XMLParser 10 | from podmaker.util import exit_signal 11 | 12 | if sys.version_info >= (3, 11): 13 | from typing import Self 14 | else: 15 | from typing_extensions import Self 16 | 17 | ResourceType = TypeVar('ResourceType') 18 | 19 | 20 | class Resource(Generic[ResourceType], metaclass=ABCMeta): 21 | @abstractmethod 22 | def get(self) -> ResourceType | None: 23 | raise NotImplementedError 24 | 25 | def ensure(self) -> ResourceType: 26 | resource = self.get() 27 | if resource is None: 28 | raise ValueError('Resource not found') 29 | return resource 30 | 31 | def __getattribute__(self, name: Any) -> Any: 32 | if name == 'get': 33 | exit_signal.check() 34 | return super().__getattribute__(name) 35 | 36 | 37 | class PlainResource(Resource[ResourceType]): 38 | """ 39 | A resource that is not fetched from a remote location. 40 | It is useful for store resources that are already available in memory. 41 | """ 42 | 43 | def __init__(self, resource: ResourceType): 44 | self.resource = resource 45 | 46 | def get(self) -> ResourceType: 47 | return self.resource 48 | 49 | 50 | # noinspection HttpUrlsUsage 51 | itunes = NamespaceGenerator('itunes', 'http://www.itunes.com/dtds/podcast-1.0.dtd') 52 | # noinspection HttpUrlsUsage 53 | content = NamespaceGenerator('content', 'http://purl.org/rss/1.0/modules/content/') 54 | 55 | 56 | class RSSComponent(XMLParser, metaclass=ABCMeta): 57 | namespace = dict(**itunes.namespace, **content.namespace) 58 | 59 | @property 60 | @abstractmethod 61 | def xml(self) -> Element: 62 | raise NotImplementedError 63 | 64 | @classmethod 65 | @abstractmethod 66 | def from_xml(cls, el: Element) -> Self: 67 | raise NotImplementedError 68 | 69 | @abstractmethod 70 | def merge(self, other: Self) -> bool: 71 | """ 72 | Merge the other component into this one. 73 | :return: Whether changes were made. 74 | """ 75 | raise NotImplementedError 76 | 77 | @staticmethod 78 | def _el_creator(tag: str, text: str | None = None, attrib: dict[str, str] | None = None) -> Element: 79 | el = Element(tag, attrib or {}) 80 | if text is not None: 81 | el.text = text 82 | return el 83 | 84 | def _common_merge(self, other: Self, field: str | tuple[str, ...]) -> bool: 85 | if isinstance(field, tuple): 86 | return any(self._common_merge(other, f) for f in field) 87 | a = getattr(self, field) 88 | b = getattr(other, field) 89 | if a != b: 90 | setattr(self, field, b) 91 | return True 92 | return False 93 | 94 | 95 | # https://www.w3.org/TR/xml/#sec-pi 96 | _pis = '' 97 | _pis_bytes = _pis.encode('utf-8') 98 | 99 | 100 | class RSSSerializer(RSSComponent, metaclass=ABCMeta): 101 | @property 102 | def str(self) -> str: 103 | s = tostring(self.xml, encoding='unicode') 104 | return _pis + s 105 | 106 | @property 107 | def bytes(self) -> bytes: 108 | s = tostring(self.xml, encoding='utf-8') # type: bytes 109 | return _pis_bytes + s 110 | 111 | 112 | class RSSDeserializer(RSSComponent, metaclass=ABCMeta): 113 | @classmethod 114 | def from_rss(cls, rss: str | bytes) -> Self: 115 | if isinstance(rss, bytes): 116 | rss = rss.decode('utf-8') 117 | el: Element = fromstring(rss) 118 | return cls.from_xml(el) 119 | -------------------------------------------------------------------------------- /podmaker/rss/enclosure.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from dataclasses import dataclass 3 | from urllib.parse import ParseResult, urlparse 4 | from xml.etree.ElementTree import Element 5 | 6 | from podmaker.rss.core import RSSComponent 7 | 8 | if sys.version_info >= (3, 11): 9 | from typing import Self 10 | else: 11 | from typing_extensions import Self 12 | 13 | 14 | @dataclass 15 | class Enclosure(RSSComponent): 16 | # URL of the episode audio file. 17 | url: ParseResult 18 | # Size of the episode audio file in bytes. 19 | length: int 20 | # The standard MIME type of the episode. 21 | type: str 22 | 23 | @property 24 | def xml(self) -> Element: 25 | return self._el_creator( 26 | 'enclosure', 27 | attrib={'url': self.url.geturl(), 'length': str(self.length), 'type': self.type} 28 | ) 29 | 30 | @classmethod 31 | def from_xml(cls, el: Element) -> Self: 32 | url = urlparse(cls._parse_required_attrib(el, '.', 'url')) 33 | length_str = cls._parse_required_attrib(el, '.', 'length') 34 | try: 35 | length = int(length_str) 36 | except ValueError: 37 | raise ValueError(f'length must be int: {length_str}') 38 | content_type = cls._parse_required_attrib(el, '.', 'type') 39 | return cls( 40 | url, 41 | length, 42 | content_type 43 | ) 44 | 45 | def merge(self, other: Self) -> bool: 46 | return self._common_merge(other, ('url', 'length', 'type')) 47 | -------------------------------------------------------------------------------- /podmaker/rss/episode.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import math 5 | import sys 6 | from dataclasses import dataclass 7 | from datetime import datetime, timedelta, timezone 8 | from email.utils import format_datetime, parsedate_to_datetime 9 | from typing import Any 10 | from urllib.parse import ParseResult, urlparse 11 | from xml.etree.ElementTree import Element 12 | 13 | from podmaker.rss import Enclosure, Resource 14 | from podmaker.rss.core import PlainResource, RSSComponent, itunes 15 | 16 | if sys.version_info >= (3, 11): 17 | from typing import Self 18 | else: 19 | from typing_extensions import Self 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | @dataclass 25 | class Episode(RSSComponent): 26 | # Fully-qualified URL of the episode audio file, including the format extension (for example, .wav, .mp3). 27 | enclosure: Resource[Enclosure] 28 | # Title of the podcast episode. 29 | title: str 30 | # A plaintext description of the podcast. 31 | description: str | None = None 32 | # Indicates whether this episode contains explicit language or adult content. 33 | explicit: bool | None = False 34 | # A permanently-assigned, case-sensitive Globally Unique Identifier for a podcast episode. 35 | guid: str | None = None 36 | # Duration of the episode. 37 | duration: timedelta | None = None 38 | # Publication date of the episode, in RFC 822 (section 5.1) format. 39 | # https://www.rfc-editor.org/rfc/rfc822#section-5.1 40 | pub_date: datetime | None = None 41 | # An episode link URL. 42 | link: ParseResult | None = None 43 | # The episode artwork. 44 | image: Resource[ParseResult] | None = None 45 | 46 | @property 47 | def xml(self) -> Element: 48 | el = Element('item') 49 | el.append(self._enclosure_el) 50 | el.append(self._title_el) 51 | el.append(self._itunes_title_el) 52 | if self.description: 53 | el.append(self._description_el) 54 | el.append(self._summary_e) 55 | if self.explicit is not None: 56 | el.append(self._explicit_el) 57 | if self.guid: 58 | el.append(self._guid_el) 59 | if self.duration: 60 | el.append(self._duration_el) 61 | if self.pub_date: 62 | el.append(self._pub_date_el) 63 | if self.link: 64 | el.append(self._link_el) 65 | if self.image: 66 | el.append(self._image_el) 67 | return el 68 | 69 | @classmethod 70 | def from_xml(cls, el: Element) -> Self: 71 | enclosure = cls._parse_enclosure(el) 72 | itunes_title = cls._parse_optional_text(el, f'.{itunes("title")}') 73 | if itunes_title is None: 74 | title = cls._parse_required_text(el, '.title') 75 | else: 76 | title = itunes_title 77 | description = cls._parse_optional_text(el, '.description') 78 | if description is None: 79 | description = cls._parse_optional_text(el, f'.{itunes("summary")}') 80 | explicit_str = cls._parse_optional_text(el, f'.{itunes("explicit")}') 81 | explicit = explicit_str == 'yes' if explicit_str is not None else None 82 | guid = cls._parse_optional_text(el, '.guid') 83 | duration = cls._parse_duration(el) 84 | pub_date = cls._parse_pub_date(el) 85 | link_str = cls._parse_optional_text(el, '.link') 86 | if link_str is not None: 87 | link = urlparse(link_str) 88 | else: 89 | link = None 90 | image_url = cls._parse_optional_attrib(el, f'.{itunes("image")}', 'href') 91 | if image_url is not None: 92 | image = PlainResource(urlparse(image_url)) 93 | else: 94 | image = None 95 | return cls(enclosure, title, description, explicit, guid, duration, pub_date, link, image) 96 | 97 | def merge(self, other: Self) -> bool: 98 | has_changed = False 99 | enclosure = self.enclosure.ensure() 100 | if enclosure.merge(other.enclosure.ensure()): 101 | has_changed = True 102 | self.enclosure = PlainResource(enclosure) 103 | return any([ 104 | has_changed, 105 | self._common_merge( 106 | other, 107 | ('title', 'description', 'explicit', 'guid', 'duration', 'pub_date') 108 | ) 109 | ]) 110 | 111 | @property 112 | def unique_id(self) -> str: 113 | if self.guid is None: 114 | return self.enclosure.ensure().url.geturl() 115 | return self.guid 116 | 117 | def __eq__(self, other: Any) -> bool: 118 | if not isinstance(other, Episode): 119 | return False 120 | return self.unique_id == other.unique_id 121 | 122 | def __hash__(self) -> int: 123 | return hash(self.unique_id) 124 | 125 | @classmethod 126 | def _parse_pub_date(cls, el: Element) -> datetime | None: 127 | pub_date_str = cls._parse_optional_text(el, '.pubDate') 128 | if pub_date_str is None: 129 | return None 130 | try: 131 | dt = parsedate_to_datetime(pub_date_str) 132 | except (TypeError, ValueError): 133 | try: 134 | if pub_date_str.endswith('Z'): 135 | pub_date_str = pub_date_str[:-1] + '+00:00' 136 | dt = datetime.fromisoformat(pub_date_str) 137 | except ValueError: 138 | logger.warning(f'invalid pubDate: {pub_date_str}') 139 | return None 140 | if dt.tzinfo is None: 141 | return dt.replace(tzinfo=timezone.utc) 142 | return dt 143 | 144 | @classmethod 145 | def _parse_enclosure(cls, el: Element) -> PlainResource[Enclosure]: 146 | enclosure_el = cls._parse_required_el(el, '.enclosure') 147 | return PlainResource(Enclosure.from_xml(enclosure_el)) 148 | 149 | @classmethod 150 | def _parse_duration(cls, el: Element) -> timedelta | None: 151 | duration_str = cls._parse_optional_text(el, f'.{itunes("duration")}') 152 | if duration_str is None: 153 | return None 154 | try: 155 | if ':' in duration_str: 156 | secs = 0 157 | for c in duration_str.split(':'): 158 | secs = secs * 60 + int(c) 159 | else: 160 | secs = int(duration_str) 161 | return timedelta(seconds=secs) 162 | except ValueError: 163 | logger.warning(f'invalid duration: {duration_str}') 164 | return None 165 | 166 | @property 167 | def _enclosure_el(self) -> Element: 168 | return self.enclosure.ensure().xml 169 | 170 | @property 171 | def _title_el(self) -> Element: 172 | return self._el_creator('title', self.title) 173 | 174 | @property 175 | def _itunes_title_el(self) -> Element: 176 | return itunes.el('title', text=self.title) 177 | 178 | @property 179 | def _description_el(self) -> Element: 180 | if self.description is None: 181 | raise ValueError('description is required') 182 | return self._el_creator('description', self.description) 183 | 184 | @property 185 | def _summary_e(self) -> Element: 186 | if self.description is None: 187 | raise ValueError('description is required') 188 | return itunes.el('summary', text=self.description) 189 | 190 | @property 191 | def _explicit_el(self) -> Element: 192 | return itunes.el('explicit', text='yes' if self.explicit else 'no') 193 | 194 | @property 195 | def _guid_el(self) -> Element: 196 | if self.guid is None: 197 | raise ValueError('empty guid field') 198 | is_perma_link = 'false' 199 | if self.guid.startswith('http'): 200 | is_perma_link = 'true' 201 | return self._el_creator('guid', self.guid, {'isPermaLink': is_perma_link}) 202 | 203 | @property 204 | def _duration_el(self) -> Element: 205 | if self.duration is None: 206 | raise ValueError('empty duration field') 207 | dur = math.ceil(self.duration.total_seconds()) 208 | return itunes.el('duration', text=str(dur)) 209 | 210 | @property 211 | def _pub_date_el(self) -> Element: 212 | if self.pub_date is None: 213 | raise ValueError('empty pub_date field') 214 | return self._el_creator('pubDate', format_datetime(self.pub_date)) 215 | 216 | @property 217 | def _link_el(self) -> Element: 218 | if self.link is None: 219 | raise ValueError('empty link field') 220 | return self._el_creator('link', self.link.geturl()) 221 | 222 | @property 223 | def _image_el(self) -> Element: 224 | if self.image is None: 225 | raise ValueError('empty image field') 226 | return itunes.el('image', attrib={'href': self.image.ensure().geturl()}) 227 | -------------------------------------------------------------------------------- /podmaker/rss/podcast.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import sys 5 | from collections.abc import Iterable 6 | from dataclasses import dataclass, field 7 | from typing import Any 8 | from urllib.parse import ParseResult, urlparse 9 | from xml.etree.ElementTree import Element 10 | 11 | from podmaker.rss import Episode, Resource 12 | from podmaker.rss.core import PlainResource, RSSDeserializer, RSSSerializer, itunes 13 | 14 | if sys.version_info >= (3, 11): 15 | from typing import Self 16 | else: 17 | from typing_extensions import Self 18 | 19 | _category_pattern = re.compile(r'^[\w &]+$') 20 | 21 | 22 | @dataclass 23 | class Owner: 24 | email: str 25 | name: str | None = None 26 | 27 | def __eq__(self, other: Any) -> bool: 28 | if not isinstance(other, Owner): 29 | return False 30 | return self.email == other.email and self.name == other.name 31 | 32 | 33 | @dataclass 34 | class Podcast(RSSSerializer, RSSDeserializer): 35 | # Defines an episodes. At least one element in the items. 36 | items: Resource[Iterable[Episode]] 37 | # Fully-qualified URL of the homepage of the podcast. 38 | link: ParseResult 39 | # Name of the podcast. 40 | title: str 41 | # An image to associate with the podcast. 42 | image: Resource[ParseResult] 43 | # A plaintext description of the podcast. 44 | description: str 45 | # Text name(s) of the author(s) of this podcast. 46 | # This need not be the same as the owner value. 47 | author: str 48 | # Manager's email for the podcast. 49 | owner: Owner | None = None 50 | # The general topic of the podcast. 51 | categories: list[str] = field(default_factory=list) 52 | # Indicates whether the podcast is explicit language or adult content. 53 | explicit: bool = False 54 | # The two-letter language code of the podcast as defined by ISO 639-1. 55 | # https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes 56 | language: str = 'en' 57 | 58 | @property 59 | def xml(self) -> Element: 60 | el = self._el_creator('rss', attrib={'version': '2.0'}) 61 | channel = self._el_creator('channel') 62 | el.append(channel) 63 | channel.append(self._generator_el) 64 | channel.append(self._link_el) 65 | channel.append(self._title_el) 66 | channel.append(self._itunes_image_el) 67 | channel.append(self._image_el) 68 | channel.append(self._description_el) 69 | channel.append(self._summary_el) 70 | if self.owner: 71 | channel.append(self._owner_el) 72 | channel.append(self._author_el) 73 | for category in self._category_el: 74 | channel.append(category) 75 | channel.append(self._explicit_el) 76 | channel.append(self._language_el) 77 | for item in self._items_el: 78 | channel.append(item) 79 | return el 80 | 81 | @classmethod 82 | def from_xml(cls, el: Element) -> Self: 83 | items = cls._parse_items(el) 84 | link = urlparse(cls._parse_required_text(el, '.channel/link')) 85 | title = cls._parse_required_text(el, '.channel/title') 86 | image = cls._parse_image(el) 87 | description = cls._parse_required_text(el, '.channel/description') 88 | owner = cls._parse_owner(el) 89 | author = cls._parse_required_text(el, f'.channel/{itunes("author")}') 90 | categories = cls._parse_categories(el) 91 | explicit = cls._parse_optional_text(el, f'.channel/{itunes("explicit")}') == 'yes' 92 | language = cls._parse_optional_text(el, '.channel/language') or 'en' 93 | return cls( 94 | items, 95 | link, 96 | title, 97 | image, 98 | description, 99 | author, 100 | owner, 101 | categories, 102 | explicit, 103 | language 104 | ) 105 | 106 | def merge(self, other: Self) -> bool: 107 | has_changed = self._common_merge( 108 | other, 109 | ('link', 'title', 'description', 'owner', 'author', 'explicit', 'language') 110 | ) 111 | image_url = self.image.get() 112 | if image_url != other.image.get(): 113 | self.image = other.image 114 | has_changed = True 115 | if set(self.categories) != set(other.categories): 116 | self.categories = other.categories 117 | has_changed = True 118 | if self._merge_items(other.items): 119 | has_changed = True 120 | return has_changed 121 | 122 | def _merge_items(self, others: Resource[Iterable[Episode]]) -> bool: 123 | new_items = [] 124 | has_changed = False 125 | old_ids = {i.unique_id: i for i in self.items.ensure()} 126 | for item in others.ensure(): 127 | if item.unique_id not in old_ids: 128 | new_items.append(item) 129 | else: 130 | old_item = old_ids[item.unique_id] 131 | has_changed = old_item.merge(item) or has_changed 132 | if not new_items and not has_changed: 133 | return False 134 | sorted_items = sorted( 135 | list(self.items.ensure()) + new_items, 136 | key=lambda i: i.pub_date or 0, 137 | reverse=True 138 | ) 139 | self.items = PlainResource(sorted_items) 140 | return True 141 | 142 | @classmethod 143 | def _parse_owner(cls, el: Element) -> Owner | None: 144 | owner_el = cls._parse_optional_el(el, f'.channel/{itunes("owner")}') 145 | if owner_el is None: 146 | return None 147 | owner_name = cls._parse_optional_text(owner_el, f'.{itunes("name")}') 148 | owner_email = cls._parse_required_text(owner_el, f'.{itunes("email")}') 149 | return Owner(owner_email, owner_name) 150 | 151 | @classmethod 152 | def _parse_items(cls, el: Element) -> Resource[Iterable[Episode]]: 153 | item_els = cls._parse_els(el, '.channel/item') 154 | if not item_els: 155 | raise ValueError('items is required') 156 | items = [] 157 | for item_el in item_els: 158 | items.append(Episode.from_xml(item_el)) 159 | if not items: 160 | raise ValueError('items is required') 161 | return PlainResource(items) 162 | 163 | @classmethod 164 | def _parse_categories(cls, el: Element) -> list[str]: 165 | categories = [] 166 | for category_el in cls._parse_els(el, f'.channel/{itunes("category")}'): 167 | if category_el.text: 168 | categories.append(category_el.text.strip()) 169 | elif category_el.get('text'): 170 | categories.append(category_el.get('text')) # type: ignore[arg-type] 171 | return categories 172 | 173 | @classmethod 174 | def _parse_image(cls, el: Element) -> Resource[ParseResult]: 175 | href = cls._parse_optional_attrib(el, f'.channel/{itunes("image")}', 'href') 176 | if href: 177 | return PlainResource(urlparse(href)) 178 | image_url = cls._parse_required_text(el, '.channel/image/url') 179 | return PlainResource(urlparse(image_url)) 180 | 181 | @property 182 | def _generator_el(self) -> Element: 183 | el = self._el_creator('generator') 184 | el.append(self._el_creator('name', 'podmaker')) 185 | el.append(self._el_creator('link', 'https://github.com/YogiLiu/podmaker')) 186 | return el 187 | 188 | @property 189 | def _items_el(self) -> Iterable[Element]: 190 | is_empty = True 191 | for item in self.items.ensure(): 192 | is_empty = False 193 | yield item.xml 194 | if is_empty: 195 | raise ValueError('items is required') 196 | 197 | @property 198 | def _link_el(self) -> Element: 199 | return self._el_creator('link', self.link.geturl()) 200 | 201 | @property 202 | def _title_el(self) -> Element: 203 | return self._el_creator('title', self.title) 204 | 205 | @property 206 | def _itunes_image_el(self) -> Element: 207 | return itunes.el('image', attrib={'href': self.image.ensure().geturl()}) 208 | 209 | @property 210 | def _image_el(self) -> Element: 211 | el = self._el_creator('image') 212 | el.append(self._el_creator('link', self.link.geturl())) 213 | el.append(self._el_creator('title', self.title)) 214 | el.append(self._el_creator('url', self.image.ensure().geturl())) 215 | return el 216 | 217 | @property 218 | def _description_el(self) -> Element: 219 | return self._el_creator('description', self.description) 220 | 221 | @property 222 | def _summary_el(self) -> Element: 223 | return itunes.el('summary', text=self.description) 224 | 225 | @property 226 | def _owner_el(self) -> Element: 227 | if self.owner is None: 228 | raise ValueError('empty owner field') 229 | el = itunes.el('owner') 230 | if self.owner.name: 231 | el.append(itunes.el('name', text=self.owner.name)) 232 | el.append(itunes.el('email', text=self.owner.email)) 233 | return el 234 | 235 | @property 236 | def _author_el(self) -> Element: 237 | return itunes.el('author', text=self.author) 238 | 239 | @property 240 | def _category_el(self) -> Iterable[Element]: 241 | for category in self.categories: 242 | parsed_category = self._parse_category(category) 243 | if parsed_category is not None: 244 | yield itunes.el('category', attrib={'text': parsed_category}) 245 | 246 | @staticmethod 247 | def _parse_category(category: str) -> str | None: 248 | if not _category_pattern.match(category): 249 | return None 250 | return category.capitalize() 251 | 252 | @property 253 | def _explicit_el(self) -> Element: 254 | return itunes.el('explicit', text='yes' if self.explicit else 'no') 255 | 256 | @property 257 | def _language_el(self) -> Element: 258 | if self.language is None: 259 | raise ValueError('empty language field') 260 | return self._el_creator('language', self.language) 261 | -------------------------------------------------------------------------------- /podmaker/rss/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/rss/util/__init__.py -------------------------------------------------------------------------------- /podmaker/rss/util/namespace.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from xml.etree.ElementTree import Element, QName, register_namespace 4 | 5 | 6 | class NamespaceGenerator: 7 | def __init__(self, prefix: str, uri: str): 8 | self.prefix = prefix 9 | self.url = uri 10 | register_namespace(prefix, uri) 11 | 12 | @property 13 | def namespace(self) -> dict[str, str]: 14 | return {self.prefix: self.url} 15 | 16 | def __call__(self, tag: str) -> QName: 17 | return QName(self.url, tag) 18 | 19 | def el(self, tag: str, *, text: str| None = None, attrib: dict[str, str] | None = None) -> Element: 20 | el = Element(self(tag).text, attrib or {}) 21 | if text is not None: 22 | el.text = text 23 | return el 24 | -------------------------------------------------------------------------------- /podmaker/rss/util/parse.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC 4 | from xml.etree.ElementTree import Element 5 | 6 | 7 | class XMLParser(ABC): 8 | namespace: dict[str, str] = {} 9 | 10 | @classmethod 11 | def _parse_optional_text(cls, el: Element, xpath: str) -> str | None: 12 | text = el.findtext(xpath, namespaces=cls.namespace) 13 | if text is None: 14 | return None 15 | return text.strip() 16 | 17 | @classmethod 18 | def _parse_required_text(cls, el: Element, xpath: str) -> str: 19 | text = cls._parse_optional_text(el, xpath) 20 | if text is None: 21 | raise ValueError(f'{xpath} is required') 22 | return text 23 | 24 | @classmethod 25 | def _parse_optional_el(cls, el: Element, xpath: str) -> Element | None: 26 | return el.find(xpath, namespaces=cls.namespace) 27 | 28 | @classmethod 29 | def _parse_required_el(cls, el: Element, xpath: str) -> Element: 30 | target = cls._parse_optional_el(el, xpath) 31 | if target is None: 32 | raise ValueError(f'{xpath} is required') 33 | return target 34 | 35 | @classmethod 36 | def _parse_els(cls, el: Element, xpath: str) -> list[Element]: 37 | return el.findall(xpath, namespaces=cls.namespace) 38 | 39 | @classmethod 40 | def _parse_optional_attrib(cls, el: Element, xpath: str, attrib: str) -> str | None: 41 | target = cls._parse_optional_el(el, xpath) 42 | if target is None: 43 | return None 44 | attrib_value = target.get(attrib, None) 45 | if attrib_value is None: 46 | return None 47 | return attrib_value.strip() 48 | 49 | @classmethod 50 | def _parse_required_attrib(cls, el: Element, xpath: str, attrib: str) -> str: 51 | text = cls._parse_optional_attrib(el, xpath, attrib) 52 | if text is None: 53 | raise ValueError(f'attrib {attrib} of {xpath} is required') 54 | return text 55 | -------------------------------------------------------------------------------- /podmaker/storage/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['Storage', 'ObjectInfo', 'EMPTY_FILE', 'get_storage'] 2 | 3 | from podmaker.config import LocalConfig, S3Config, StorageConfig 4 | from podmaker.storage.core import EMPTY_FILE, ObjectInfo, Storage 5 | 6 | 7 | def get_storage(config: StorageConfig) -> Storage: 8 | if isinstance(config, S3Config): 9 | from podmaker.storage.s3 import S3 10 | return S3(config) 11 | elif isinstance(config, LocalConfig): 12 | from podmaker.storage.local import Local 13 | return Local(config) 14 | else: 15 | raise ValueError(f'unknown storage destination: {config.dest}') 16 | -------------------------------------------------------------------------------- /podmaker/storage/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from contextlib import contextmanager 5 | from dataclasses import dataclass 6 | from io import BytesIO 7 | from typing import IO, AnyStr, Iterator 8 | from urllib.parse import ParseResult 9 | 10 | 11 | @dataclass 12 | class ObjectInfo: 13 | # Fully-qualified URL of the object. 14 | uri: ParseResult 15 | # Size of the object in bytes. 16 | size: int 17 | # The standard MIME type of the object. 18 | type: str 19 | 20 | 21 | EMPTY_FILE = BytesIO(b'') 22 | 23 | 24 | class Storage(ABC): 25 | @abstractmethod 26 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult: 27 | """ 28 | :return: data uri 29 | """ 30 | raise NotImplementedError 31 | 32 | @abstractmethod 33 | def check(self, key: str) -> ObjectInfo | None: 34 | raise NotImplementedError 35 | 36 | @abstractmethod 37 | @contextmanager 38 | def get(self, key: str) -> Iterator[IO[bytes]]: 39 | """ 40 | :return: file-like object, return `EMPTY_FILE` if not found 41 | """ 42 | raise NotImplementedError 43 | 44 | def start(self) -> None: 45 | pass 46 | 47 | def stop(self) -> None: 48 | pass 49 | -------------------------------------------------------------------------------- /podmaker/storage/local.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __all__ = ['Local'] 4 | 5 | import logging 6 | import sqlite3 7 | import threading 8 | from contextlib import contextmanager 9 | from pathlib import Path 10 | from typing import IO, AnyStr, Iterator 11 | from urllib.parse import ParseResult, urljoin, urlparse 12 | 13 | from podmaker.config import LocalConfig 14 | from podmaker.storage import ObjectInfo, Storage 15 | from podmaker.storage.core import EMPTY_FILE 16 | 17 | logger = logging.getLogger(__name__) 18 | lock = threading.Lock() 19 | 20 | 21 | class Local(Storage): 22 | _db: sqlite3.Connection 23 | _file_buffering = 10 * 1024 * 1024 # 10MB 24 | 25 | def __init__(self, config: LocalConfig): 26 | self.public_endpoint = str(config.public_endpoint) 27 | self.base_dir = Path(config.base_dir) 28 | self.data_dir = self.base_dir / 'data' 29 | 30 | def start(self) -> None: 31 | if not self.base_dir.exists(): 32 | self.base_dir.mkdir(parents=True, exist_ok=True) 33 | self.base_dir.chmod(0o750) 34 | logger.info(f'created base directory {self.base_dir} (mod: {self.base_dir.stat().st_mode:o})') 35 | if not self.data_dir.exists(): 36 | self.data_dir.mkdir(parents=True, exist_ok=True) 37 | self.base_dir.chmod(0o750) 38 | logger.info(f'created data directory {self.data_dir} (mod: {self.base_dir.stat().st_mode:o})') 39 | with lock: 40 | self._db = sqlite3.connect(self.base_dir / 'db.sqlite3') 41 | self._db.execute(''' 42 | CREATE TABLE IF NOT EXISTS files ( 43 | key TEXT PRIMARY KEY, 44 | type TEXT NOT NULL DEFAULT '', 45 | size INTEGER NOT NULL CHECK (size >= 0) 46 | ) 47 | ''') 48 | 49 | def stop(self) -> None: 50 | with lock: 51 | self._db.close() 52 | 53 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult: 54 | if key.startswith('/'): 55 | key = key[1:] 56 | path = self.data_dir / key 57 | size = 0 58 | with open(path, 'wb') as f: 59 | while True: 60 | chunk = data.read(self._file_buffering) 61 | if isinstance(chunk, str): 62 | chunk_bytes = chunk.encode('utf-8') 63 | else: 64 | chunk_bytes = chunk 65 | if not chunk_bytes: 66 | break 67 | size += len(chunk_bytes) 68 | f.write(chunk_bytes) 69 | path.chmod(0o640) 70 | data.seek(0) 71 | info = self.check(key) 72 | with lock: 73 | if info is None: 74 | self._db.execute( 75 | 'INSERT INTO files (key, type, size) VALUES (?, ?, ?)', 76 | (key, content_type, size), 77 | ) 78 | else: 79 | self._db.execute( 80 | 'UPDATE files SET type = ?, size = ? WHERE key = ?', 81 | (content_type, size, key), 82 | ) 83 | url = urljoin(self.public_endpoint, key) 84 | return urlparse(url) 85 | 86 | def check(self, key: str) -> ObjectInfo | None: 87 | if key.startswith('/'): 88 | key = key[1:] 89 | with lock: 90 | cursor = self._db.execute( 91 | 'SELECT type, size FROM files WHERE key = ?', 92 | (key,), 93 | ) 94 | row = cursor.fetchone() 95 | if row is None: 96 | return None 97 | content_type, size = row 98 | url = urljoin(self.public_endpoint, key) 99 | return ObjectInfo(type=content_type, uri=urlparse(url), size=size) 100 | 101 | @contextmanager 102 | def get(self, key: str) -> Iterator[IO[bytes]]: 103 | if key.startswith('/'): 104 | key = key[1:] 105 | path = self.data_dir / key 106 | if not path.exists(): 107 | yield EMPTY_FILE 108 | else: 109 | with open(path, 'rb') as f: 110 | yield f 111 | -------------------------------------------------------------------------------- /podmaker/storage/s3.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __all__ = ['S3'] 4 | 5 | import base64 6 | import hashlib 7 | import logging 8 | import sys 9 | from contextlib import contextmanager 10 | from tempfile import SpooledTemporaryFile 11 | from typing import IO, AnyStr, Iterator 12 | from urllib.parse import ParseResult, urljoin, urlparse 13 | 14 | from podmaker.config import S3Config 15 | from podmaker.storage import ObjectInfo, Storage 16 | from podmaker.storage.core import EMPTY_FILE 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | try: 21 | import boto3 22 | from botocore.exceptions import ClientError 23 | except ImportError: 24 | logger.error('boto3 is not installed, S3 storage is not available') 25 | sys.exit(1) 26 | 27 | 28 | class S3(Storage): 29 | _md5_chunk_size = 10 * 1024 * 1024 # 10MB 30 | _file_buffering = 10 * 1024 * 1024 # 10MB 31 | 32 | def __init__(self, config: S3Config): 33 | self.s3 = boto3.resource( 34 | 's3', endpoint_url=str(config.endpoint), aws_access_key_id=config.access_key, 35 | aws_secret_access_key=config.access_secret) 36 | self.bucket = self.s3.Bucket(config.bucket) 37 | self.public_endpoint = str(config.public_endpoint) 38 | 39 | def _calculate_md5(self, data: IO[AnyStr]) -> str: 40 | logger.debug('calculate md5') 41 | md5 = hashlib.md5() 42 | while True: 43 | chunk = data.read(self._md5_chunk_size) 44 | if not chunk: 45 | break 46 | if isinstance(chunk, str): 47 | md5.update(chunk.encode()) 48 | elif isinstance(chunk, bytes): 49 | md5.update(chunk) 50 | else: 51 | raise TypeError(f'chunk must be str or bytes, not {type(chunk)}') 52 | data.seek(0) 53 | return base64.b64encode(md5.digest()).decode() 54 | 55 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult: 56 | if key.startswith('/'): 57 | key = key[1:] 58 | md5 = self._calculate_md5(data) 59 | logger.info(f'upload: {key} (md5: {md5})') 60 | self.bucket.put_object(Key=key, ContentMD5=md5, Body=data, ContentType=content_type) 61 | logger.info(f'uploaded: {key}') 62 | data.seek(0) 63 | return self.get_uri(key) 64 | 65 | def check(self, key: str) -> ObjectInfo | None: 66 | logger.debug(f'check: {key}') 67 | if key.startswith('/'): 68 | key = key[1:] 69 | try: 70 | info = self.bucket.Object(key=key) 71 | return ObjectInfo( 72 | uri=self.get_uri(key), 73 | size=info.content_length, 74 | type=info.content_type 75 | ) 76 | except ClientError: 77 | return None 78 | 79 | def get_uri(self, key: str) -> ParseResult: 80 | url = urljoin(self.public_endpoint, key) 81 | return urlparse(url) 82 | 83 | @contextmanager 84 | def get(self, key: str) -> Iterator[IO[bytes]]: 85 | logger.info(f'get: {key}') 86 | if key.startswith('/'): 87 | key = key[1:] 88 | with SpooledTemporaryFile(buffering=self._file_buffering) as f: 89 | try: 90 | obj = self.bucket.Object(key=key).get() 91 | while True: 92 | chunk = obj['Body'].read(self._file_buffering) 93 | if not chunk: 94 | break 95 | f.write(chunk) 96 | f.seek(0) 97 | yield f 98 | except ClientError: 99 | logger.debug(f'not found: {key}') 100 | yield EMPTY_FILE 101 | -------------------------------------------------------------------------------- /podmaker/util/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['exit_signal', 'ExitSignalError', 'retry'] 2 | 3 | from podmaker.util.exit import ExitSignalError, exit_signal 4 | from podmaker.util.retry_util import retry 5 | -------------------------------------------------------------------------------- /podmaker/util/exit.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import threading 3 | from typing import Any, Callable 4 | 5 | _exit_signals = ( 6 | signal.SIGINT, 7 | signal.SIGHUP, 8 | signal.SIGTERM, 9 | ) 10 | 11 | _lock = threading.Lock() 12 | 13 | 14 | class ExitSignalError(Exception): 15 | pass 16 | 17 | 18 | class ExitSignalRegisterError(Exception): 19 | pass 20 | 21 | 22 | class ExitSignal: 23 | def __init__(self) -> None: 24 | self._is_received = False 25 | self._has_listened = False 26 | self._exit_handlers: list[Callable[[], None]] = [] 27 | 28 | def receive(self) -> None: 29 | with _lock: 30 | self._is_received = True 31 | 32 | def check(self) -> None: 33 | with _lock: 34 | if self._is_received: 35 | raise ExitSignalError('exit signal received') 36 | 37 | def register(self, handler: Callable[[], None]) -> None: 38 | with _lock: 39 | if self._has_listened: 40 | raise ExitSignalRegisterError('already listened') 41 | self._exit_handlers.append(handler) 42 | 43 | def _handler(self, *_: Any) -> None: 44 | self.receive() 45 | for handler in self._exit_handlers: 46 | handler() 47 | 48 | def listen(self) -> None: 49 | with _lock: 50 | self._has_listened = True 51 | for sig in _exit_signals: 52 | signal.signal(sig, self._handler) 53 | 54 | 55 | exit_signal = ExitSignal() 56 | -------------------------------------------------------------------------------- /podmaker/util/retry_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import sys 5 | import time 6 | from datetime import timedelta 7 | from typing import Callable, Tuple, Type, TypeVar 8 | 9 | if sys.version_info < (3, 10): 10 | from typing_extensions import ParamSpec 11 | else: 12 | from typing import ParamSpec 13 | 14 | 15 | P = ParamSpec('P') 16 | T = TypeVar('T') 17 | _logger = logging.getLogger(__name__) 18 | 19 | 20 | def retry( 21 | cnt: int, 22 | *, 23 | wait: timedelta = timedelta(seconds=0), 24 | catch: Type[Exception] | Tuple[Type[Exception], ...] = Exception, 25 | logger: logging.Logger = _logger, 26 | ) -> Callable[[Callable[P, T]], Callable[P, T]]: 27 | """ 28 | A decorator to retry the function when exception raised. 29 | The function will be called at least once and at most cnt + 1 times. 30 | 31 | :param cnt: retry count 32 | :param wait: wait time between retries 33 | :param catch: the exception to retry 34 | :param logger: logger to log retry info 35 | """ 36 | if cnt <= 0: 37 | raise ValueError('cnt must be positive') 38 | wait_seconds = wait.total_seconds() 39 | 40 | def deco(func: Callable[P, T]) -> Callable[P, T]: 41 | def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: 42 | for _ in range(cnt): 43 | try: 44 | return func(*args, **kwargs) 45 | except catch: 46 | logger.warning('retrying...') 47 | if wait_seconds > 0: 48 | logger.warning(f'wait {wait_seconds}s before retry') 49 | time.sleep(wait_seconds) 50 | return func(*args, **kwargs) 51 | return wrapper 52 | return deco 53 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "podmaker" 3 | version = "0.9.0" 4 | description = "Convert online media into podcast feeds." 5 | license = "Unlicense" 6 | authors = ["YogiLiu "] 7 | maintainers = ["YogiLiu "] 8 | readme = "README.md" 9 | homepage = "https://github.com/YogiLiu/podmaker" 10 | repository = "https://github.com/YogiLiu/podmaker" 11 | documentation = "https://github.com/YogiLiu/podmaker/blob/main/README.md" 12 | keywords = ["rss", "youtube", "podcast"] 13 | classifiers = [ 14 | "Development Status :: 4 - Beta", 15 | "Environment :: Console" 16 | ] 17 | 18 | [tool.poetry.dependencies] 19 | python = "^3.9" 20 | tomlkit = "^0.12.1" 21 | pydantic = {extras = ["email"], version = "^2.2.0"} 22 | apscheduler = "^3.10.4" 23 | boto3 = { version = "^1.28.27", optional = true } 24 | yt-dlp = { version = "^2023.7.6", optional = true } 25 | 26 | [tool.poetry.extras] 27 | s3 = ["boto3"] 28 | youtube = ["yt-dlp"] 29 | all = ["boto3", "yt-dlp"] 30 | 31 | [tool.poetry.group.dev.dependencies] 32 | boto3-stubs = { extras = ["essential"], version = "^1.28.27" } 33 | autohooks = "^23.7.0" 34 | autohooks-plugin-ruff = "^23.6.1" 35 | autohooks-plugin-mypy = "^23.3.0" 36 | typing-extensions = "^4.7.1" 37 | 38 | [tool.poetry.scripts] 39 | podmaker = 'podmaker.cli:run' 40 | 41 | [tool.autohooks] 42 | mode = "poetry" 43 | pre-commit = ["autohooks.plugins.mypy", "autohooks.plugins.ruff"] 44 | 45 | [tool.ruff] 46 | select = ["C90", "F", "I", "PL"] 47 | target-version = "py39" 48 | line-length = 120 49 | 50 | [tool.mypy] 51 | python_version = "3.9" 52 | strict = true 53 | plugins = ["pydantic.mypy"] 54 | 55 | [[tool.mypy.overrides]] 56 | module = ["yt_dlp", "apscheduler.*"] 57 | ignore_missing_imports = true 58 | 59 | 60 | [tool.commitizen] 61 | name = "cz_conventional_commits" 62 | tag_format = "$version" 63 | version_scheme = "pep440" 64 | version_provider = "poetry" 65 | update_changelog_on_bump = true 66 | major_version_zero = true 67 | 68 | [build-system] 69 | requires = ["poetry-core"] 70 | build-backend = "poetry.core.masonry.api" 71 | 72 | [tool.poetry.urls] 73 | "Bug Tracker" = "https://github.com/YogiLiu/podmaker/issues" 74 | -------------------------------------------------------------------------------- /systemd/podmaker.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Convert online media into podcast feeds. 3 | Documentation=https://github.com/YogiLiu/podmaker/blob/main/README.md 4 | After=network.target network-online.target 5 | Wants=network-online.target 6 | 7 | [Service] 8 | User=nobody 9 | Type=simple 10 | ExecStart=/opt/podmaker/venv/bin/podmaker -c /opt/podmaker/config.toml 11 | 12 | [Install] 13 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/apple.rss.test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Hiking Treks 5 | https://www.apple.com/itunes/podcasts/ 6 | en-us 7 | © 2020 John Appleseed 8 | The Sunset Explorers 9 | 10 | Love to get outdoors and discover nature's treasures? Hiking Treks is the 11 | show for you. We review hikes and excursions, review outdoor gear and interview 12 | a variety of naturalists and adventurers. Look for new episodes each week. 13 | 14 | serial 15 | 18 | 19 | 20 | 21 | false 22 | 23 | trailer 24 | Hiking Treks Trailer 25 | 26 | Apple Podcasts.]]> 29 | 30 | 35 | D03EEC9B-B1B4-475B-92C8-54F853FA2A22 36 | Tue, 8 Jan 2019 01:15:00 GMT 37 | 1079 38 | false 39 | 40 | 41 | full 42 | 4 43 | 2 44 | S02 EP04 Mt. Hood, Oregon 45 | 46 | Tips for trekking around the tallest mountain in Oregon 47 | 48 | 53 | 22BCFEBF-44FB-4A19-8229-7AC678629F57 54 | Tue, 07 May 2019 12:00:00 GMT 55 | 1024 56 | false 57 | 58 | 59 | full 60 | 3 61 | 2 62 | S02 EP03 Bouldering Around Boulder 63 | 64 | We explore fun walks to climbing areas about the beautiful Colorado city of Boulder. 65 | 66 | 69 | href="http://example.com/podcasts/everything/ 70 | 75 | BE486CAA-B3D5-4FB0-8298-EFEBE71C5982 76 | Tue, 30 Apr 2019 13:00:00 EST 77 | 3627 78 | false 79 | 80 | 81 | full 82 | 2 83 | 2 84 | S02 EP02 Caribou Mountain, Maine 85 | 86 | Put your fitness to the test with this invigorating hill climb. 87 | 88 | 91 | 96 | 142FAFE9-B1DF-4F6D-BAA8-79BDBAF653A9 97 | Tue, 23 May 2019 02:00:00 -0700 98 | 2434 99 | false 100 | 101 | 102 | full 103 | 1 104 | 2 105 | S02 EP01 Stawamus Chief 106 | 107 | We tackle Stawamus Chief outside of Vancouver, BC and you should too! 108 | 109 | 114 | 5F1DBAEB-3327-49FB-ACB3-DB0158A1D0A3 115 | 2019-02-16T07:00:00.000Z 116 | 13:24 117 | false 118 | 119 | 120 | full 121 | 4 122 | 1 123 | S01 EP04 Kuliouou Ridge Trail 124 | 125 | Oahu, Hawaii, has some picturesque hikes and this is one of the best! 126 | 127 | 132 | B5FCEB80-317C-4CD0-A84B-807065B43FB9 133 | Tue, 27 Nov 2018 01:15:00 +0000 134 | 929 135 | false 136 | 137 | 138 | full 139 | 3 140 | 1 141 | S01 EP03 Blood Mountain Loop 142 | 143 | Hiking the Appalachian Trail and Freeman Trail in Georgia 144 | 145 | 150 | F0C5D763-ED85-4449-9C09-81FEBDF6F126 151 | Tue, 23 Oct 2018 01:15:00 +0000 152 | 1440 153 | false 154 | 155 | 156 | full 157 | 2 158 | 1 159 | S01 EP02 Garden of the Gods Wilderness 160 | 161 | Wilderness Area Garden of the Gods in Illinois is a delightful spot for 162 | an extended hike. 163 | 164 | 169 | 821DD0B2-571D-4DFD-8E11-556E8C1EFE6A 170 | Tue, 18 Sep 2018 01:15:00 +0000 171 | 839 172 | false 173 | 174 | 175 | full 176 | 1 177 | 1 178 | S01 EP01 Upper Priest Lake Trail to Continental Creek Trail 179 | 180 | We check out this powerfully scenic hike following the river in the Idaho 181 | Panhandle National Forests. 182 | 183 | 188 | EABDA7EE-1AC6-4B60-9E11-6B3F30B72F87 189 | Tue, 14 Aug 2018 01:15:00 +0000 190 | 1399 191 | false 192 | 193 | 194 | -------------------------------------------------------------------------------- /tests/data/google.rss.test.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | Dafna 的斑马饲养播客 6 | 7 | dafna@example.com 8 | 9 | Dafna 10 | 一个宠物主人关于饲养人气条纹马的指南 11 | 12 | zh-cn 13 | https://www.example.com/podcasts/dafnas-zebras/ 14 | 15 | 关于饲养斑马的十大误区 16 | 这里介绍了关于照顾、喂养和繁殖可爱条纹动物的十大误区。 17 | Tue, 14 Mar 2017 12:00:00 GMT 18 | 20 | 30:00 21 | dzpodtop10 22 | 23 | 24 | 让斑马保持整洁干净 25 | 让斑马保持干净非常耗时,但付出的努力是值得的。 26 | Fri, 24 Feb 2017 12:00:00 GMT 27 | 29 | 22:48 30 | dzpodclean 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/helper.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from urllib.error import URLError 3 | from urllib.request import urlopen 4 | 5 | 6 | def network_available(url: str, timeout: timedelta = timedelta(seconds=10)) -> bool: 7 | try: 8 | urlopen(url, timeout=timeout.total_seconds()) 9 | return True 10 | except URLError: 11 | return False 12 | -------------------------------------------------------------------------------- /tests/provider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/provider/__init__.py -------------------------------------------------------------------------------- /tests/provider/test_resource.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import unittest 3 | 4 | from podmaker.rss import Resource 5 | from podmaker.util import ExitSignalError, exit_signal 6 | 7 | parent, child = multiprocessing.Pipe() 8 | 9 | 10 | def exit_signal_tester() -> None: 11 | class Tester(Resource[None]): 12 | def get(self) -> None: 13 | return None 14 | 15 | t = Tester() 16 | exit_signal.receive() 17 | try: 18 | t.get() 19 | except BaseException as e: 20 | child.send(e) 21 | else: 22 | child.send(None) 23 | 24 | 25 | class TestResource(unittest.TestCase): 26 | def test_exit_signal(self) -> None: 27 | p = multiprocessing.Process(target=exit_signal_tester) 28 | p.start() 29 | p.join() 30 | self.assertIsInstance(parent.recv(), ExitSignalError) 31 | -------------------------------------------------------------------------------- /tests/provider/test_youtube.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | import unittest 5 | from datetime import date 6 | from typing import IO, Any, AnyStr 7 | from urllib.parse import ParseResult, urlparse 8 | 9 | from podmaker.config import OwnerConfig, SourceConfig 10 | from podmaker.fetcher.youtube import YouTube 11 | from podmaker.storage import ObjectInfo, Storage 12 | from tests.helper import network_available 13 | 14 | if sys.version_info >= (3, 11): 15 | pass 16 | else: 17 | pass 18 | 19 | 20 | class MockStorage(Storage): 21 | cnt = 0 22 | 23 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult: 24 | assert data.name.endswith('.mp3'), 'only mp3 is supported' 25 | assert self.cnt % 2 == 1, 'file already exists' 26 | return urlparse('https://example.com') 27 | 28 | def check(self, key: str) -> ObjectInfo: 29 | self.cnt += 1 30 | return ObjectInfo( 31 | uri=urlparse('https://example.com'), 32 | size=0, 33 | type='audio/mp3' 34 | ) 35 | 36 | def get(self, key: str) -> Any: 37 | pass 38 | 39 | 40 | @unittest.skipUnless(network_available('https://www.youtube.com'), 'network is not available') 41 | class TestYoutube(unittest.TestCase): 42 | cases = [ 43 | { 44 | 'source': SourceConfig( 45 | id='youtube', 46 | url='https://www.youtube.com/playlist?list=PLOU2XLYxmsILHvpAkROp2dXz-jQi4S4_y', 47 | regex=r'Introduction to ARCore Augmented Faces, \w+' 48 | ), 49 | 'attr': ( 50 | 'Introduction to ARCore Augmented Faces', 51 | 'Learn how to use ARCore’s Augmented Faces APIs to create face effects with Unity, Android, and iOS.', 52 | 'Google for Developers', 53 | ), 54 | 'items': [ 55 | ('8ih7eHwPoxM', 'Introduction to ARCore Augmented Faces, Unity', date.fromisoformat('2019-09-12')), 56 | ('-4EvaCQpVEQ', 'Introduction to ARCore Augmented Faces, Android', date.fromisoformat('2019-09-12')), 57 | ('QAqOTaCCD9M', 'Introduction to ARCore Augmented Faces, iOS', date.fromisoformat('2019-09-12')), 58 | ] 59 | }, 60 | { 61 | 'source': SourceConfig( 62 | id='youtube', 63 | url='https://www.youtube.com/@PyCon2015/videos' 64 | ), 65 | 'attr': ( 66 | 'PyCon 2015 - Videos', 67 | '', 68 | 'PyCon 2015', 69 | ), 70 | 'items': [ 71 | ('G-uKNd5TSBw', 'Keynote - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-16')), 72 | ('lNqtyi3sM-k', 'Keynote - Gabriella Coleman - PyCon 2015', date.fromisoformat('2015-04-16')), 73 | ('2wDvzy6Hgxg', 'Type Hints - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-12')), 74 | ] 75 | }, 76 | ] 77 | 78 | def setUp(self) -> None: 79 | storage = MockStorage() 80 | self.youtube = YouTube( 81 | storage, 82 | OwnerConfig(name='Podmaker', email='test@podmaker.dev') 83 | ) 84 | 85 | def test_fetch(self) -> None: 86 | for case in self.cases: 87 | source = case['source'] 88 | attr = case['attr'] 89 | podcast = self.youtube.fetch(source) # type: ignore[arg-type] 90 | self.assertEqual(urlparse(str(source.url)), podcast.link) # type: ignore[attr-defined] 91 | self.assertEqual(attr[0], podcast.title) # type: ignore[index] 92 | self.assertIsNotNone(podcast.image.ensure()) 93 | self.assertEqual(attr[1], podcast.description) # type: ignore[index] 94 | self.assertEqual('Podmaker', podcast.owner.name) # type: ignore[union-attr] 95 | self.assertEqual('test@podmaker.dev', podcast.owner.email) # type: ignore[union-attr] 96 | self.assertEqual(attr[2], podcast.author) # type: ignore[index] 97 | self.assertEqual([], podcast.categories) 98 | self.assertFalse(podcast.explicit) 99 | self.assertEqual('en', podcast.language) 100 | items = case['items'] 101 | for (idx, episode) in enumerate(podcast.items.ensure()): 102 | if idx >= len(items): # type: ignore[arg-type] 103 | break 104 | current = items[idx] # type: ignore[index] 105 | self.assertEqual(current[0], episode.guid) 106 | self.assertEqual(current[1], episode.title) 107 | self.assertIsNotNone(episode.pub_date) 108 | if episode.pub_date is not None: 109 | self.assertEqual(current[2], episode.pub_date.date()) 110 | self.assertIsNotNone(episode.link) 111 | self.assertIsNotNone(episode.image.ensure()) # type: ignore[union-attr] 112 | self.assertEqual(urlparse('https://example.com'), episode.enclosure.ensure().url) 113 | -------------------------------------------------------------------------------- /tests/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/storage/__init__.py -------------------------------------------------------------------------------- /tests/storage/test_local.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | from io import BytesIO 4 | from pathlib import Path 5 | 6 | from podmaker.config import LocalConfig 7 | from podmaker.storage.local import Local 8 | 9 | file_size = 10 10 | 11 | 12 | class TestS3(unittest.TestCase): 13 | base_dir = Path('/tmp/podmaker') 14 | data_dir = base_dir / 'data' 15 | 16 | def setUp(self) -> None: 17 | self.storage = Local( 18 | LocalConfig(dest='local', base_dir='/tmp/podmaker', public_endpoint='http://localhost:9000') 19 | ) 20 | self.storage.start() 21 | self.file = BytesIO() 22 | self.file.write(random.randbytes(file_size)) 23 | self.file.seek(0) 24 | 25 | def tearDown(self) -> None: 26 | self.storage.stop() 27 | 28 | # noinspection DuplicatedCode 29 | def test_s3(self) -> None: 30 | for _ in range(2): 31 | result = self.storage.put(self.file, key='/test.bin', content_type='application/octet-stream') 32 | self.assertEqual('http://localhost:9000/test.bin', result.geturl()) 33 | self.assertTrue((self.data_dir / 'test.bin').exists()) 34 | info = self.storage.check(key='/test.bin') 35 | self.assertIsNotNone(info) 36 | if info is not None: 37 | self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl()) 38 | self.assertEqual(self.file.getbuffer().nbytes, info.size) 39 | self.assertEqual('application/octet-stream', info.type) 40 | with self.storage.get(key='/test.bin') as f: 41 | self.assertEqual(self.file.read(), f.read()) 42 | self.file.seek(0) 43 | 44 | def test_check_empty(self) -> None: 45 | r = self.storage.check(key='/empty.bin') 46 | self.assertIsNone(r) 47 | -------------------------------------------------------------------------------- /tests/storage/test_s3.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | from dataclasses import dataclass 4 | from io import BytesIO 5 | from typing import Any, Type 6 | from unittest.mock import patch 7 | from urllib.parse import ParseResult, urlparse 8 | 9 | import boto3 10 | from botocore.exceptions import ClientError 11 | 12 | from podmaker.config import S3Config 13 | from podmaker.storage.s3 import S3 14 | 15 | file_size = 10 16 | 17 | 18 | @dataclass 19 | class MockedObject: 20 | content_length: int 21 | content_type: str 22 | 23 | 24 | # noinspection PyPep8Naming 25 | class MockedBucket: 26 | @staticmethod 27 | def put_object(*, Key: str, **__: Any) -> ParseResult: 28 | return urlparse(f'http://localhost:9000/{Key}') 29 | 30 | @staticmethod 31 | def Object(*, key: str) -> MockedObject: 32 | if key == 'empty.bin': 33 | raise ClientError(error_response={}, operation_name='GetObject') 34 | return MockedObject(content_type='application/octet-stream', content_length=file_size) 35 | 36 | 37 | # noinspection PyPep8Naming 38 | class MockedServiceResource: 39 | @staticmethod 40 | def Bucket(*_: Any, **__: Any) -> MockedBucket: 41 | return MockedBucket() 42 | 43 | 44 | def mock_resource(*_: Any, **__: Any) -> Type[MockedServiceResource]: 45 | return MockedServiceResource 46 | 47 | 48 | class TestS3(unittest.TestCase): 49 | @patch.object(boto3, 'resource', mock_resource) 50 | def setUp(self) -> None: 51 | self.s3 = S3( 52 | S3Config( 53 | dest='s3', 54 | access_key='123', 55 | access_secret='456', 56 | bucket='podmaker', 57 | endpoint='http://localhost:9000', 58 | public_endpoint='http://localhost:9000' 59 | ) 60 | ) 61 | self.file = BytesIO() 62 | self.file.write(random.randbytes(file_size)) 63 | self.file.seek(0) 64 | 65 | def test_s3(self) -> None: 66 | for _ in range(2): 67 | result = self.s3.put(self.file, key='/test.bin', content_type='application/octet-stream') 68 | self.assertEqual('http://localhost:9000/test.bin', result.geturl()) 69 | info = self.s3.check(key='/test.bin') 70 | self.assertIsNotNone(info) 71 | if info is not None: 72 | self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl()) 73 | self.assertEqual(self.file.getbuffer().nbytes, info.size) 74 | self.assertEqual('application/octet-stream', info.type) 75 | 76 | def test_check_empty(self) -> None: 77 | r = self.s3.check(key='/empty.bin') 78 | self.assertIsNone(r) 79 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | from pathlib import Path 4 | 5 | from podmaker.config import PMConfig 6 | 7 | if sys.version_info >= (3, 11): 8 | import tomllib as toml 9 | else: 10 | import tomlkit as toml 11 | 12 | 13 | class TestConfig(unittest.TestCase): 14 | def setUp(self) -> None: 15 | self.path = Path(__file__).parent.parent / 'config.example.toml' 16 | 17 | def test_from_file(self) -> None: 18 | config = PMConfig.from_file(self.path) 19 | self.assertEqual(toml.loads(self.path.read_text()), config.model_dump(mode='json')) 20 | -------------------------------------------------------------------------------- /tests/test_rss.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import math 4 | import unittest 5 | from datetime import datetime, timezone 6 | from email.utils import parsedate_to_datetime 7 | from pathlib import Path 8 | from typing import Any, Callable 9 | from urllib.parse import urlparse 10 | from xml.etree.ElementTree import Element, fromstring 11 | 12 | from podmaker.rss import Episode, Podcast 13 | from podmaker.rss.core import PlainResource, Resource, itunes 14 | 15 | 16 | def convert_to_seconds(duration: str) -> int: 17 | if ':' in duration: 18 | secs = 0 19 | for c in duration.split(':'): 20 | secs = secs * 60 + int(c) 21 | else: 22 | secs = int(duration) 23 | return secs 24 | 25 | 26 | def find_strip_text(el: Element, path: str, namespaces: dict[str, str] | None = None) -> str | None: 27 | text = el.findtext(path, namespaces=namespaces) 28 | if text: 29 | return text.strip() 30 | return None 31 | 32 | 33 | class TestRSS(unittest.TestCase): 34 | def setUp(self) -> None: 35 | self.rss_docs = [ 36 | Path('data/apple.rss.test.xml').read_text(), 37 | Path('data/google.rss.test.xml').read_text(), 38 | ] 39 | self.elements = [ 40 | fromstring(r) 41 | for r in self.rss_docs 42 | ] 43 | 44 | def test_from_rss(self) -> None: # noqa: PLR0912, C901, PLR0915 45 | for i, element in enumerate(self.elements): 46 | doc = self.rss_docs[i] 47 | podcast = Podcast.from_rss(doc) 48 | self.assertEqual(find_strip_text(element, '.channel/link'), podcast.link.geturl()) 49 | self.assertEqual(find_strip_text(element, '.channel/title'), podcast.title) 50 | self.assertEqual( 51 | element.find( 52 | f'.channel/{itunes("image")}', namespaces=itunes.namespace 53 | ).get('href'), # type: ignore[union-attr] 54 | podcast.image.ensure().geturl() 55 | ) 56 | self.assertEqual(find_strip_text(element, '.channel/description'), podcast.description) 57 | owner_el = element.find(f'.channel/{itunes("owner")}', namespaces=itunes.namespace) 58 | if owner_el is not None: 59 | owner_name = find_strip_text(owner_el, f'.{itunes("name")}') 60 | if owner_name: 61 | self.assertEqual(owner_name, podcast.owner.name) # type: ignore[union-attr] 62 | else: 63 | self.assertIsNone(podcast.owner.name) # type: ignore[union-attr] 64 | self.assertEqual( 65 | find_strip_text(owner_el, f'.{itunes("email")}'), 66 | podcast.owner.email # type: ignore[union-attr] 67 | ) 68 | self.assertEqual( 69 | find_strip_text(element, f'.channel/{itunes("author")}', namespaces=itunes.namespace), 70 | podcast.author 71 | ) 72 | c_els = element.findall(f'.channel/{itunes("category")}', namespaces=itunes.namespace) 73 | self.assertEqual( 74 | [c_el.text.strip() for c_el in c_els], # type: ignore[union-attr] 75 | podcast.categories 76 | ) 77 | explicit = find_strip_text(element, f'.channel/{itunes("explicit")}', namespaces=itunes.namespace) 78 | if explicit == 'yes': 79 | self.assertTrue(podcast.explicit) 80 | else: 81 | self.assertFalse(podcast.explicit) 82 | language = find_strip_text(element, '.channel/language') 83 | if language: 84 | self.assertEqual(language, podcast.language) 85 | else: 86 | self.assertIsNone(podcast.explicit) 87 | item_els = element.findall('.channel/item') 88 | for j, item in enumerate(podcast.items.ensure()): 89 | el = item_els[j] 90 | enclosure_el = el.find('.enclosure') 91 | self.assertEqual( 92 | enclosure_el.get('url'), # type: ignore[union-attr] 93 | item.enclosure.ensure().url.geturl() 94 | ) 95 | self.assertEqual( 96 | enclosure_el.get('type'), # type: ignore[union-attr] 97 | item.enclosure.ensure().type 98 | ) 99 | self.assertEqual( 100 | enclosure_el.get('length'), # type: ignore[union-attr] 101 | str(item.enclosure.ensure().length) 102 | ) 103 | if find_strip_text(el, '.title'): 104 | self.assertEqual(find_strip_text(el, '.title'), item.title) 105 | else: 106 | self.assertEqual(find_strip_text(el, f'.{itunes("title")}', namespaces=itunes.namespace), 107 | item.title) 108 | desc = find_strip_text(el, '.description') 109 | if desc: 110 | self.assertEqual(desc, item.description) 111 | summary = find_strip_text(el, f'.{itunes("summary")}') 112 | if summary: 113 | self.assertEqual(summary, item.description) 114 | explicit = find_strip_text(el, f'.{itunes("explicit")}', namespaces=itunes.namespace) 115 | if explicit == 'yes': 116 | self.assertTrue(item.explicit) 117 | elif explicit == 'no': 118 | self.assertFalse(item.explicit) 119 | else: 120 | self.assertFalse(item.explicit) 121 | self.assertEqual(el.find('.guid').text, item.guid) # type: ignore[union-attr] 122 | duration = find_strip_text(el, f'.{itunes("duration")}', namespaces=itunes.namespace) 123 | if duration: 124 | if ':' in duration: 125 | secs = 0 126 | for c in duration.split(':'): 127 | secs = secs * 60 + int(c) 128 | else: 129 | secs = int(duration) 130 | self.assertEqual(secs, math.ceil(item.duration.total_seconds())) # type: ignore[union-attr] 131 | else: 132 | self.assertIsNone(item.duration) 133 | pub_date = find_strip_text(el, 'pubDate') 134 | if pub_date: 135 | try: 136 | dt = parsedate_to_datetime(pub_date) 137 | except (TypeError, ValueError): 138 | if pub_date.endswith('Z'): 139 | pub_date = pub_date[:-1] + '+00:00' 140 | dt = datetime.fromisoformat(pub_date) 141 | self.assertEqual(dt.date(), item.pub_date.date()) # type: ignore[union-attr] 142 | self.assertEqual(dt.time(), item.pub_date.time()) # type: ignore[union-attr] 143 | else: 144 | self.assertIsNone(item.pub_date) 145 | 146 | def test_xml(self) -> None: # noqa: PLR0912, C901 147 | cases: list[str | list[str] | dict[str, Any]] = [ 148 | '.', 149 | '.channel', 150 | '.channel/title', 151 | f'.channel/{itunes("owner")}/{itunes("email")}', 152 | f'.channel/{itunes("author")}', 153 | '.channel/description', 154 | [ 155 | '.channel/description', 156 | f'.channel/{itunes("summary")}' 157 | ], 158 | f'.channel/{itunes("image")}', 159 | [ 160 | '.channel/title', 161 | '.channel/image/title' 162 | ], 163 | [ 164 | '.channel/link', 165 | '.channel/image/link' 166 | ], 167 | { 168 | 'a': f'.channel/{itunes("image")}', 169 | 'b': '.channel/image/url', 170 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href') 171 | }, 172 | '.channel/language', 173 | '.channel/link', 174 | '.channel/item/[1]/title', 175 | '.channel/item/[1]/description', 176 | '.channel/item/[1]/pubDate', 177 | '.channel/item/[1]/enclosure', 178 | f'.channel/item/[1]/{itunes("duration")}', 179 | '.channel/item/[1]/guid', 180 | '.channel/item/[1]/link', 181 | { 182 | 'a': f'.channel/item/[1]/{itunes("image")}', 183 | 'b': f'.channel/item/[1]/{itunes("image")}', 184 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href') 185 | }, 186 | '.channel/item/[2]/title', 187 | '.channel/item/[2]/description', 188 | '.channel/item/[2]/pubDate', 189 | '.channel/item/[2]/enclosure', 190 | f'.channel/item/[2]/{itunes("duration")}', 191 | '.channel/item/[2]/guid', 192 | '.channel/item/[2]/link', 193 | { 194 | 'a': f'.channel/item/[2]/{itunes("image")}', 195 | 'b': f'.channel/item/[3]/{itunes("image")}', 196 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href') 197 | }, 198 | ] 199 | for idx, element in enumerate(self.elements): 200 | doc = self.rss_docs[idx] 201 | podcast = Podcast.from_rss(doc) 202 | xml = podcast.xml 203 | for case in cases: 204 | if isinstance(case, dict): 205 | a = element.find(case['a']) 206 | if a is None: 207 | continue 208 | b = xml.find(case['b']) 209 | action: Callable[[Element], Any] = case['action'] 210 | self.assertEqual(action(a), action(b), case) # type: ignore[arg-type] 211 | else: 212 | if isinstance(case, list): 213 | a = element.find(case[0]) 214 | b = xml.find(case[1]) 215 | else: 216 | a = element.find(case) 217 | b = xml.find(case) 218 | if a is None: 219 | continue 220 | if a.text: 221 | a.text = a.text.strip() 222 | if b.text: # type: ignore[union-attr] 223 | b.text = b.text.strip() # type: ignore[union-attr] 224 | a_t = a.text or a.attrib.pop('text', '') 225 | b_t = b.text or b.attrib.pop('text', '') # type: ignore[union-attr] 226 | if 'pubDate' in case: 227 | self.assertEqual( 228 | parsedate_to_datetime(a_t), parsedate_to_datetime(b_t), case) # type: ignore[arg-type] 229 | elif 'duration' in case: 230 | self.assertEqual(convert_to_seconds(a_t), convert_to_seconds(b_t), # type: ignore[arg-type] 231 | case) 232 | else: 233 | self.assertEqual(a_t, b_t, case) 234 | b_attr = b.attrib.copy() # type: ignore[union-attr] 235 | if 'isPermaLink' not in a.attrib: 236 | b_attr.pop('isPermaLink', None) 237 | self.assertEqual(a.attrib, b_attr, case) 238 | 239 | def test_merge(self) -> None: 240 | for doc in self.rss_docs: 241 | ap = Podcast.from_rss(doc) 242 | bp = Podcast.from_rss(doc) 243 | self.assertFalse(ap.merge(bp)) 244 | items = list(bp.items.ensure()) 245 | items.insert( 246 | 0, 247 | Episode( 248 | enclosure=items[0].enclosure, 249 | title='foo', 250 | description='bar', 251 | guid='baz', 252 | duration=items[0].duration, 253 | explicit=False, 254 | pub_date=datetime.now(timezone.utc), 255 | ) 256 | ) 257 | cases = [ 258 | ('items', PlainResource(items)), 259 | ('link', urlparse('https://example.com')), 260 | ('title', 'foo'), 261 | ('image', PlainResource(urlparse('https://example.com/image.png'))), 262 | ('description', 'bar'), 263 | ('author', 'baz'), 264 | ('categories', ['foo', 'bar']), 265 | ('explicit', True), 266 | ('language', 'ja'), 267 | ] 268 | for field, value in cases: 269 | setattr(bp, field, value) 270 | self.assertTrue(ap.merge(bp), f'{field} is not merged') 271 | if isinstance(value, Resource): 272 | ar = getattr(ap, field).get() 273 | br = getattr(bp, field).get() 274 | if isinstance(ar, list): 275 | ar = set(ar) 276 | br = set(br) 277 | self.assertEqual(ar, br, f'{field} is not merged: {value}') 278 | else: 279 | self.assertEqual(getattr(ap, field), value, f'{field} is not merged: {value}') 280 | -------------------------------------------------------------------------------- /tests/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/util/__init__.py -------------------------------------------------------------------------------- /tests/util/test_retry.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from podmaker.util import retry 5 | 6 | 7 | class TestRetry(unittest.TestCase): 8 | def test_no_exception(self) -> None: 9 | spy = mock.Mock(return_value=1) 10 | func = retry(3)(spy) 11 | self.assertEqual(1, func()) 12 | self.assertEqual(1, spy.call_count) 13 | 14 | def test_retry_success(self) -> None: 15 | spy = mock.Mock(side_effect=[Exception, 1]) 16 | func = retry(3)(spy) 17 | self.assertEqual(1, func()) 18 | self.assertEqual(2, spy.call_count) 19 | 20 | def test_retry_failed(self) -> None: 21 | spy = mock.Mock(side_effect=Exception) 22 | func = retry(3)(spy) 23 | self.assertRaises(Exception, func) 24 | self.assertEqual(4, spy.call_count) 25 | 26 | def test_specify_exception(self) -> None: 27 | spy = mock.Mock(side_effect=ValueError) 28 | func = retry(3, catch=TypeError)(spy) 29 | self.assertRaises(ValueError, func) 30 | self.assertEqual(1, spy.call_count) 31 | 32 | spy = mock.Mock(side_effect=ValueError) 33 | func = retry(3, catch=ValueError)(spy) 34 | self.assertRaises(ValueError, func) 35 | self.assertEqual(4, spy.call_count) 36 | --------------------------------------------------------------------------------