├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── README.zh_CN.md
├── config.example.toml
├── podmaker
    ├── __init__.py
    ├── __main__.py
    ├── cli.py
    ├── config
    │   ├── __init__.py
    │   ├── core.py
    │   └── storage.py
    ├── fetcher
    │   ├── __init__.py
    │   ├── core.py
    │   └── youtube.py
    ├── processor
    │   ├── __init__.py
    │   ├── core.py
    │   ├── scheduling.py
    │   └── task.py
    ├── rss
    │   ├── README.md
    │   ├── __init__.py
    │   ├── core.py
    │   ├── enclosure.py
    │   ├── episode.py
    │   ├── podcast.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── namespace.py
    │   │   └── parse.py
    ├── storage
    │   ├── __init__.py
    │   ├── core.py
    │   ├── local.py
    │   └── s3.py
    └── util
    │   ├── __init__.py
    │   ├── exit.py
    │   └── retry_util.py
├── poetry.lock
├── pyproject.toml
├── systemd
    └── podmaker.service
└── tests
    ├── __init__.py
    ├── data
        ├── apple.rss.test.xml
        └── google.rss.test.xml
    ├── helper.py
    ├── provider
        ├── __init__.py
        ├── test_resource.py
        └── test_youtube.py
    ├── storage
        ├── __init__.py
        ├── test_local.py
        └── test_s3.py
    ├── test_config.py
    ├── test_rss.py
    └── util
        ├── __init__.py
        └── test_retry.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea/
  2 | .vscode/
  3 | config.toml
  4 | 
  5 | # Created by https://www.toptal.com/developers/gitignore/api/python
  6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  7 | 
  8 | ### Python ###
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | cover/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | .pybuilder/
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | #   For a library or package, you might want to ignore these files since the code is
 95 | #   intended to run in multiple environments; otherwise, check them in:
 96 | # .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # poetry
106 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
108 | #   commonly ignored for libraries.
109 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110 | #poetry.lock
111 | 
112 | # pdm
113 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114 | #pdm.lock
115 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116 | #   in version control.
117 | #   https://pdm.fming.dev/#use-with-ide
118 | .pdm.toml
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | ### Python Patch ###
171 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
172 | poetry.toml
173 | 
174 | # ruff
175 | .ruff_cache/
176 | 
177 | # LSP config files
178 | pyrightconfig.json
179 | 
180 | # End of https://www.toptal.com/developers/gitignore/api/python
181 | 
182 | 
183 | # Created by https://www.toptal.com/developers/gitignore/api/linux
184 | # Edit at https://www.toptal.com/developers/gitignore?templates=linux
185 | 
186 | ### Linux ###
187 | *~
188 | 
189 | # temporary files which can be created if a process still has a handle open of a deleted file
190 | .fuse_hidden*
191 | 
192 | # KDE directory preferences
193 | .directory
194 | 
195 | # Linux trash folder which might appear on any partition or disk
196 | .Trash-*
197 | 
198 | # .nfs files are created when an open file is removed but is still being accessed
199 | .nfs*
200 | 
201 | # End of https://www.toptal.com/developers/gitignore/api/linux
202 | 
203 | # Created by https://www.toptal.com/developers/gitignore/api/macos
204 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos
205 | 
206 | ### macOS ###
207 | # General
208 | .DS_Store
209 | .AppleDouble
210 | .LSOverride
211 | 
212 | # Icon must end with two \r
213 | Icon
214 | 
215 | 
216 | # Thumbnails
217 | ._*
218 | 
219 | # Files that might appear in the root of a volume
220 | .DocumentRevisions-V100
221 | .fseventsd
222 | .Spotlight-V100
223 | .TemporaryItems
224 | .Trashes
225 | .VolumeIcon.icns
226 | .com.apple.timemachine.donotpresent
227 | 
228 | # Directories potentially created on remote AFP share
229 | .AppleDB
230 | .AppleDesktop
231 | Network Trash Folder
232 | Temporary Items
233 | .apdisk
234 | 
235 | ### macOS Patch ###
236 | # iCloud generated files
237 | *.icloud
238 | 
239 | # End of https://www.toptal.com/developers/gitignore/api/macos
240 | 
241 | # Created by https://www.toptal.com/developers/gitignore/api/windows
242 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows
243 | 
244 | ### Windows ###
245 | # Windows thumbnail cache files
246 | Thumbs.db
247 | Thumbs.db:encryptable
248 | ehthumbs.db
249 | ehthumbs_vista.db
250 | 
251 | # Dump file
252 | *.stackdump
253 | 
254 | # Folder config file
255 | [Dd]esktop.ini
256 | 
257 | # Recycle Bin used on file shares
258 | $RECYCLE.BIN/
259 | 
260 | # Windows Installer files
261 | *.cab
262 | *.msi
263 | *.msix
264 | *.msm
265 | *.msp
266 | 
267 | # Windows shortcuts
268 | *.lnk
269 | 
270 | # End of https://www.toptal.com/developers/gitignore/api/windows
271 | 
272 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## 0.9.0 (2023-09-11)
  2 | 
  3 | ### Feat
  4 | 
  5 | - **prrocessor.scheduling**: support custom interval
  6 | 
  7 | ## 0.8.1 (2023-09-01)
  8 | 
  9 | ### Fix
 10 | 
 11 | - fix typo
 12 | 
 13 | ## 0.8.0 (2023-09-01)
 14 | 
 15 | ### BREAKING CHANGE
 16 | 
 17 | - Should install yt_dlp and boto3 by extra dependencies.
 18 | 
 19 | ### Feat
 20 | 
 21 | - **fetcher,storage**: make yt_dlp and boto3 optional
 22 | - **util**: add retry decorator
 23 | - **fetcher.core,processor**: add start and stop hook to fetcher
 24 | 
 25 | ### Fix
 26 | 
 27 | - **util**: rename retry module to retry_util
 28 | 
 29 | ## 0.7.4 (2023-08-27)
 30 | 
 31 | ### Fix
 32 | 
 33 | - **rss.podcast**: fix pu_bdate format
 34 | - **fetcher.processor**: check exit signal
 35 | 
 36 | ## 0.7.3 (2023-08-26)
 37 | 
 38 | ### Fix
 39 | 
 40 | - **config**: fix tomlkit
 41 | 
 42 | ## 0.7.2 (2023-08-26)
 43 | 
 44 | ### Fix
 45 | 
 46 | - **config.core**: fix union
 47 | 
 48 | ## 0.7.1 (2023-08-26)
 49 | 
 50 | ### Fix
 51 | 
 52 | - **config.storage**: remove absmeta
 53 | 
 54 | ## 0.7.0 (2023-08-25)
 55 | 
 56 | ### Feat
 57 | 
 58 | - **storage**: support local storage
 59 | 
 60 | ### Fix
 61 | 
 62 | - **cli**: support local storage
 63 | - **rss.podcast**: fix items merge
 64 | - **fetcher.youtube**: add source id to skip log
 65 | 
 66 | ## 0.6.1 (2023-08-25)
 67 | 
 68 | ### Fix
 69 | 
 70 | - **fetcher.youtube**: cache dir
 71 | 
 72 | ## 0.6.0 (2023-08-25)
 73 | 
 74 | ### Feat
 75 | 
 76 | - **fetcher.youtube**: add source id to log
 77 | 
 78 | ## 0.5.0 (2023-08-25)
 79 | 
 80 | ### Feat
 81 | 
 82 | - **storage**: support start and stop storage
 83 | - **config**: support filter episodes by regex
 84 | - **config**: use storage instead s3
 85 | 
 86 | ## 0.4.0 (2023-08-24)
 87 | 
 88 | ### Feat
 89 | 
 90 | - **fetcher**: support youtube channel
 91 | 
 92 | ### Fix
 93 | 
 94 | - **rss.core**: remove stylesheet
 95 | - **fetcher.youtube**: catch download error
 96 | - **rss.podcast**: fix image url
 97 | 
 98 | ## 0.3.1 (2023-08-23)
 99 | 
100 | ### Fix
101 | 
102 | - **peocessor.task**: fix mime
103 | - **asset**: fix script url
104 | 
105 | ## 0.3.0 (2023-08-23)
106 | 
107 | ### BREAKING CHANGE
108 | 
109 | - changes for config file
110 | 
111 | ### Feat
112 | 
113 | - **rss**: add stylesheet
114 | - add exit signal
115 | 
116 | ### Fix
117 | 
118 | - **rss.core**: fix encoding of rss bytes
119 | - **config**: change s3.cdn_prefix to s3.public_endpoint
120 | 
121 | ### Refactor
122 | 
123 | - **processor**: move execution to Task class, and support task hook
124 | 
125 | ## 0.2.2 (2023-08-22)
126 | 
127 | ### Fix
128 | 
129 | - **fetcher.youtube**: fetch image and link for episode
130 | 
131 | ## 0.2.1 (2023-08-21)
132 | 
133 | ### Fix
134 | 
135 | - **config**: quote id before used to generate storage key
136 | 
137 | ## 0.2.0 (2023-08-21)
138 | 
139 | ### Feat
140 | 
141 | - add cli
142 | - **processor**: add processor
143 | - **rss**: support mergation
144 | - **rss**: use qname to manage namespace
145 | - **rss**: support load rss object from xml string
146 | - **rss.core**: add plain resource and rss deserializer
147 | - **config**: add source config
148 | - **config**: support optional env and required env
149 | - add config
150 | - **rss**: add rss generator and serializer
151 | - complete youtube parser and s3 storage
152 | 
153 | ### Fix
154 | 
155 | - **rss**: compatible with apple's requirements
156 | - **processor.scheduling**: add next run time to add_job
157 | - **config**: fix decorator
158 | - **config**: fix tomlkit
159 | - **processor.scheduling**: fix shutdown
160 | - **fetcher.youtube**: fix logger
161 | - **config**: rename source.name to source.id
162 | - **processor.schedulling**: fix shutdown
163 | - **processor.core**: fix rss key
164 | - **rss**: fix text
165 | - **config**: add app config
166 | - **processor.core**: fix original file
167 | - change cli argument
168 | - **fetcher.youtube**: add lock
169 | - **fetcher**: rename parser to fetcher
170 | - **parser.youtube,-storage.s3**: remove redundant config dependency
171 | - **rss**: reduce public class
172 | - **env**: use dataclass as env object
173 | - **rss.podcast**: fix category pattern
174 | - supplement podcast field
175 | 
176 | ### Refactor
177 | 
178 | - **cli**: remove cli logic to cli module
179 | - add log
180 | - **rss**: hide unnecessary property
181 | - **parser.youtube**: use lru_cache
182 | - **config**: rename env to config and use pydantic manage config
183 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Podmaker
  2 | 
  3 | *Read this document in other languages: [English](README.md), [简体中文](README.zh_CN.md)*
  4 | 
  5 | Convert online media into podcast feeds.
  6 | 
  7 | ![PyPI - Version](https://img.shields.io/pypi/v/podmaker)
  8 | ![PyPI - Status](https://img.shields.io/pypi/status/podmaker)
  9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/podmaker)
 10 | ![PyPI - Implementation](https://img.shields.io/pypi/implementation/podmaker)
 11 | ![PyPI - License](https://img.shields.io/pypi/l/podmaker)
 12 | 
 13 | 
 14 | ## Features
 15 | 
 16 | - Extract audio from online videos.
 17 | - No need to deploy web services.
 18 | - Generate podcast feeds.
 19 | - Deploy with watch mode to keep feeds up-to-date.
 20 | 
 21 | ## Dependencies
 22 | 
 23 | This tool uses **ffmpeg** to extract audio from videos. Ensure it's installed within `$PATH` before using this tool.
 24 | 
 25 | Additionally, you should install extra dependencies according to your requirements:
 26 | 
 27 | - `podmaker[all]`: Install all extra dependencies.
 28 | - `podmaker[s3]`: Install dependencies for S3 storage.
 29 | - `podmaker[youtube]`: Install dependencies for YouTube.
 30 | 
 31 | Install multiple extra dependencies simultaneously using `podmaker[extra1,extra2,...]`.
 32 | 
 33 | ## Configuration
 34 | 
 35 | Before diving into this tool, craft a configuration file, a TOML file to be precise.
 36 | By default, the file resides at `${WORK_DIR}/config.toml`. Customize the path using the `-c` or `--config` option.
 37 | An example configuration file can be found at [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml).
 38 | 
 39 | ## Usage
 40 | 
 41 | ### Systemd
 42 | 
 43 | Deploy this tool in the background with systemd (requires root privileges):
 44 | 
 45 | ```bash
 46 | # create virtual environment
 47 | apt install python3 python3-venv
 48 | mkdir -p /opt/podmaker && cd /opt/podmaker
 49 | python3 -m venv venv
 50 | 
 51 | # install podmaker
 52 | ./venv/bin/pip install "podmaker[all]"
 53 | 
 54 | # create and edit config file
 55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml
 56 | vim config.toml
 57 | 
 58 | # create systemd service
 59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service
 60 | systemctl daemon-reload
 61 | 
 62 | # enable and start service
 63 | systemctl enable podmaker
 64 | systemctl start podmaker
 65 | ```
 66 | 
 67 | ### Manual
 68 | 
 69 | ### Using pip
 70 | 
 71 | For the optimal experience, we recommend installing this tool within a virtual environment.
 72 | 
 73 | ```bash
 74 | pip install "podmaker[all]"
 75 | ```
 76 | 
 77 | ### Using `pipx`
 78 | 
 79 | ```bash
 80 | pipx install "podmaker[all]"
 81 | ```
 82 | 
 83 | ### Run
 84 | 
 85 | ```bash
 86 | podmaker -c path/to/config.toml
 87 | ```
 88 | 
 89 | or 
 90 |     
 91 | ```bash
 92 | python -m podmaker -c path/to/config.toml
 93 | ```
 94 | 
 95 | ## Roadmap
 96 | 
 97 | ### Platforms
 98 | 
 99 | - [x] YouTube
100 |     - [x] Playlist
101 |     - [x] Channel
102 | - [ ] BiliBili
103 | 
104 | ### Resource Hosting
105 | 
106 | - [x] S3
107 | - [x] Local
108 | 
109 | ## Contributing
110 | 
111 | Your contributions are invaluable. Feel free to submit pull requests.
112 | Before committing, ensure your changes pass unit tests and `autohooks`.
113 | 
114 | To activate `autohooks`, use the following command:
115 | 
116 | ```bash
117 | poetry run autohooks activate --mode poetry
118 | ```
119 | 
120 | This process will automatically lint, format, and sort code imports.
121 | 
122 | When introducing new features, remember to provide corresponding tests.
123 | 
124 | ## License
125 | 
126 | For licensing details, refer to [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE).


--------------------------------------------------------------------------------
/README.zh_CN.md:
--------------------------------------------------------------------------------
  1 | # Podmaker
  2 | 
  3 | *本文档的其他语言: [English](README.md), [简体中文](README.zh_CN.md)*
  4 | 
  5 | 将在线媒体转换成播客订阅。
  6 | 
  7 | ![PyPI - Version](https://img.shields.io/pypi/v/podmaker)
  8 | ![PyPI - Status](https://img.shields.io/pypi/status/podmaker)
  9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/podmaker)
 10 | ![PyPI - Implementation](https://img.shields.io/pypi/implementation/podmaker)
 11 | ![PyPI - License](https://img.shields.io/pypi/l/podmaker)
 12 | 
 13 | 
 14 | ## 功能
 15 | 
 16 | - 从网络视频中提取音频；
 17 | - 无需额外部署 Web 服务；
 18 | - 自动生成播客订阅；
 19 | - 通过 `watch` 模式自动更新订阅。
 20 | 
 21 | ## 依赖
 22 | 
 23 | 本工具使用 **ffmpeg** 从视频中提取音频，请确保 `$PATH` 中包含 `ffmpeg`。
 24 | 
 25 | 另外, 你可以更根据你的需求安装额外的依赖：
 26 | 
 27 | - `podmaker[all]`: 安装下述的所有依赖；
 28 | - `podmaker[s3]`: 提供 S3 支持；
 29 | - `podmaker[youtube]`: 提供 YouTube 支持。
 30 | 
 31 | 你可以使用 `podmaker[extra1,extra2,...]` 的方式同时安装多个额外依赖。
 32 | 
 33 | ## 配置
 34 | 
 35 | 在开始使用本工具之前，请先准备一个 TOML 格式的配置文件。
 36 | 默认情况下，配置文件位于 `${WORK_DIR}/config.toml`。你可以通过 `-c` 或 `--config` 选项来指定配置文件的路径。
 37 | 你可以在 [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml) 中找到一个示例配置文件。
 38 | 
 39 | ## 使用方法
 40 | 
 41 | ### Systemd
 42 | 
 43 | 使用 systemd 后台运行本工具（需要 root 权限）：
 44 | 
 45 | ```bash
 46 | # 创建虚拟环境
 47 | apt install python3 python3-venv
 48 | mkdir -p /opt/podmaker && cd /opt/podmaker
 49 | python3 -m venv venv
 50 | 
 51 | # 安装 podmaker
 52 | ./venv/bin/pip install "podmaker[all]"
 53 | 
 54 | # 创建配置文件
 55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml
 56 | vim config.toml
 57 | 
 58 | # 创建 systemd 服务
 59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service
 60 | systemctl daemon-reload
 61 | 
 62 | # 启动服务，并设置开机自启
 63 | systemctl enable podmaker
 64 | systemctl start podmaker
 65 | ```
 66 | 
 67 | ### 手动运行
 68 | 
 69 | ### 使用 pip 安装
 70 | 
 71 | 为了获得最佳体验，我们建议你在虚拟环境中安装本工具。
 72 | 
 73 | ```bash
 74 | pip install "podmaker[all]"
 75 | ```
 76 | 
 77 | ### 使用 `pipx` 安装
 78 | 
 79 | ```bash
 80 | pipx install "podmaker[all]"
 81 | ```
 82 | 
 83 | ### 运行
 84 | 
 85 | ```bash
 86 | podmaker -c path/to/config.toml
 87 | ```
 88 | 
 89 | 或者 
 90 |     
 91 | ```bash
 92 | python -m podmaker -c path/to/config.toml
 93 | ```
 94 | 
 95 | ## 项目规划
 96 | 
 97 | ### 平台支持
 98 | 
 99 | - [x] YouTube
100 |     - [x] 播放列表
101 |     - [x] 频道
102 | - [ ] 哔哩哔哩（鸽）
103 | 
104 | ### 资源托管
105 | 
106 | - [x] S3
107 | - [x] 本地文件
108 | 
109 | ## 贡献指南
110 | 
111 | 你的贡献弥足珍贵，请不要吝啬提出你的 Pull Request。
112 | 在提交代码之前，请确保你的代码通过单元测试和 `autohooks`。
113 | 
114 | 你可以使用下述命令激活 `autohooks`：
115 | 
116 | ```bash
117 | poetry run autohooks activate --mode poetry
118 | ```
119 | 
120 | 这个程序会自动进行代码风格检查、格式化和 import 排序。
121 | 
122 | 如果你添加了新的功能，请确保提供了相应的测试。
123 | 
124 | ## 许可证
125 | 
126 | 查看许可证详情，请参阅 [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE)。


--------------------------------------------------------------------------------
/config.example.toml:
--------------------------------------------------------------------------------
 1 | [app]
 2 | # running mode, "oneshot" or "watch"
 3 | # - oneshot: generate the feed and exit
 4 | # - watch: generate the feed and watch for changes
 5 | mode = "oneshot"
 6 | 
 7 | # level of logging, "DEBUG", "INFO", "WARNING", "ERROR"
 8 | loglevel = "INFO"
 9 | 
10 | # optional, the admin of the feed
11 | [owner]
12 | name = "podmaker"
13 | email = "admin@podmaker.dev"
14 | 
15 | # notice: the sources is an array, it must specify using `[[]]`
16 | [[sources]]
17 | # used to generate the feed url, must be unique, prefer to use numbers, letters, space and underscores
18 | id = "source_1"
19 | # optional, the display name of the source
20 | name = "Source 1"
21 | # optional, the regex to match the episode
22 | regex = "Episode \\d+"
23 | # the url of the source
24 | url = "https://example.com/source_1/"
25 | # optional, the interval to check the source, in seconds, default to 3600
26 | interval = 3600
27 | 
28 | [[sources]]
29 | id = "source_2"
30 | name = "Source 2"
31 | regex = "Episode \\d+"
32 | url = "https://example.com/source_2/"
33 | interval = 3600
34 | 
35 | # only one is allowed to be specified
36 | [storage]
37 | # destination of the generated feed, support "local" and "s3"
38 | dest = "local"
39 | # the directory to store the generated feed, your must change it
40 | # the files will save to $base_dir/data/, and the feed will save to $base_dir/data/feed.xml
41 | # you can use nginx to serve the $base_dir/data/
42 | # !!WARNING!! don't serve the $base_dir/ directly, it may leak your config file
43 | base_dir = "/path/to/storage"
44 | # must be public-read, this endpoint should be pointed to $base_dir/data/
45 | public_endpoint = "https://example.com/"
46 | 
47 | #[storage]
48 | #dest = "s3"
49 | #access_key = "123"
50 | #access_secret = "456"
51 | #bucket = "podmake"
52 | #endpoint = "https://s3.amazonaws.com/"
53 | #public_endpoint = "https://s3.amazonaws.com/"
54 | 


--------------------------------------------------------------------------------
/podmaker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/__init__.py


--------------------------------------------------------------------------------
/podmaker/__main__.py:
--------------------------------------------------------------------------------
1 | from podmaker.cli import run
2 | 
3 | if __name__ == '__main__':
4 |     run()
5 | 


--------------------------------------------------------------------------------
/podmaker/cli.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | from podmaker.config import ConfigError, PMConfig
 7 | from podmaker.processor import get_processor
 8 | from podmaker.storage import get_storage
 9 | from podmaker.util import exit_signal
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def run() -> None:
15 |     parser = argparse.ArgumentParser(prog='podmaker', description='Podcast generator.')
16 |     parser.add_argument('-c', '--conf', help='Path to config file (default: config.toml).', type=Path,
17 |                         default=Path('config.toml'))
18 |     args = parser.parse_args()
19 |     config_path = args.conf
20 |     config: PMConfig
21 |     try:
22 |         config = PMConfig.from_file(config_path)
23 |     except ConfigError as e:
24 |         logger.error(e)
25 |         sys.exit(1)
26 |     logging.basicConfig(
27 |         level=config.app.loglevel,
28 |         format='%(asctime)s %(levelname)s %(name)s %(message)s',
29 |     )
30 |     storage = get_storage(config.storage)
31 |     storage.start()
32 |     logger.info(f'running in {config.app.mode} mode')
33 |     processor = get_processor(config, storage)
34 |     exit_signal.listen()
35 |     try:
36 |         processor.run()
37 |     except BaseException:
38 |         storage.stop()
39 |         raise
40 | 


--------------------------------------------------------------------------------
/podmaker/config/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['OwnerConfig', 'AppConfig', 'StorageConfig', 'SourceConfig', 'PMConfig', 'ConfigError', 'S3Config',
2 |            'LocalConfig']
3 | 
4 | from podmaker.config.core import AppConfig, ConfigError, OwnerConfig, PMConfig, SourceConfig
5 | from podmaker.config.storage import LocalConfig, S3Config, StorageConfig
6 | 


--------------------------------------------------------------------------------
/podmaker/config/core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import re
 4 | import sys
 5 | from pathlib import PurePath
 6 | from typing import Literal, Optional, Union
 7 | from urllib.parse import quote
 8 | 
 9 | from pydantic import BaseModel, EmailStr, Field, HttpUrl, ValidationError
10 | 
11 | from podmaker.config.storage import LocalConfig, S3Config
12 | 
13 | if sys.version_info >= (3, 11):
14 |     import tomllib as toml
15 | else:
16 |     import tomlkit as toml
17 | 
18 | 
19 | class OwnerConfig(BaseModel):
20 |     name: Optional[str] = Field(None, min_length=1, frozen=True)
21 |     email: EmailStr = Field(frozen=True)
22 | 
23 | 
24 | # noinspection PyNestedDecorators
25 | class AppConfig(BaseModel):
26 |     mode: Literal['oneshot', 'watch'] = Field('oneshot', frozen=True)
27 |     loglevel: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = Field('INFO', frozen=True)
28 | 
29 | 
30 | class SourceConfig(BaseModel):
31 |     id: str = Field(min_length=1, frozen=True)
32 |     name: Optional[str] = Field(None, min_length=1, frozen=True)
33 |     regex: Optional[re.Pattern[str]] = Field(None, frozen=True)
34 |     url: HttpUrl = Field(frozen=True)
35 |     interval: int = Field(1 * 60 * 60, ge=1, frozen=True)
36 | 
37 |     def get_storage_key(self, key: str) -> str:
38 |         return f'{quote(self.id)}/{key}'
39 | 
40 | 
41 | class ConfigError(Exception):
42 |     pass
43 | 
44 | 
45 | class PMConfig(BaseModel):
46 |     owner: Optional[OwnerConfig] = Field(None, frozen=True)
47 |     storage: Union[S3Config, LocalConfig] = Field(frozen=True)
48 |     sources: tuple[SourceConfig, ...] = Field(frozen=True)
49 |     app: AppConfig = Field(default_factory=AppConfig, frozen=True)
50 | 
51 |     @classmethod
52 |     def from_file(cls, path: PurePath) -> PMConfig:
53 |         try:
54 |             with open(path, 'rb') as f:
55 |                 doc = toml.load(f)
56 |                 # https://github.com/sdispater/tomlkit/issues/275
57 |                 if getattr(doc, 'unwrap', None):
58 |                     data = doc.unwrap()
59 |                 else:
60 |                     data = doc
61 |         except FileNotFoundError as e:
62 |             raise ConfigError(f'config file not found: {path}') from e
63 |         try:
64 |             return cls(**data)
65 |         except ValidationError as e:
66 |             raise ConfigError(f'can not initial config: {e}')
67 | 


--------------------------------------------------------------------------------
/podmaker/config/storage.py:
--------------------------------------------------------------------------------
 1 | from pathlib import PurePath
 2 | from typing import Literal
 3 | 
 4 | from pydantic import BaseModel, Field, HttpUrl
 5 | 
 6 | SupportedStorage = Literal['s3', 'local']
 7 | 
 8 | 
 9 | class StorageConfig(BaseModel):
10 |     dest: SupportedStorage = Field(min_length=1, frozen=True)
11 | 
12 | 
13 | class S3Config(StorageConfig):
14 |     dest: Literal['s3'] = Field(frozen=True)
15 |     access_key: str = Field(min_length=1, frozen=True)
16 |     access_secret: str = Field(min_length=1, frozen=True)
17 |     bucket: str = Field(min_length=1, frozen=True)
18 |     endpoint: HttpUrl = Field(frozen=True)
19 |     public_endpoint: HttpUrl = Field(frozen=True)
20 | 
21 | 
22 | class LocalConfig(StorageConfig):
23 |     dest: Literal['local'] = Field(frozen=True)
24 |     base_dir: PurePath = Field(min_length=1, frozen=True)
25 |     public_endpoint: HttpUrl = Field(frozen=True)
26 | 


--------------------------------------------------------------------------------
/podmaker/fetcher/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['Fetcher']
2 | 
3 | from podmaker.fetcher.core import Fetcher
4 | 


--------------------------------------------------------------------------------
/podmaker/fetcher/core.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from podmaker.config import SourceConfig
 4 | from podmaker.rss import Podcast
 5 | 
 6 | 
 7 | class Fetcher(ABC):
 8 |     @abstractmethod
 9 |     def fetch(self, source: SourceConfig) -> Podcast:
10 |         raise NotImplementedError
11 | 
12 |     def start(self) -> None:
13 |         pass
14 | 
15 |     def stop(self) -> None:
16 |         pass
17 | 


--------------------------------------------------------------------------------
/podmaker/fetcher/youtube.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __all__ = ['YouTube']
  4 | 
  5 | import logging
  6 | import os
  7 | import sys
  8 | import tempfile
  9 | from datetime import datetime, timedelta, timezone
 10 | from functools import lru_cache
 11 | from inspect import isgenerator
 12 | from tempfile import TemporaryDirectory
 13 | from typing import Any, Iterable
 14 | from urllib.parse import ParseResult, urlparse
 15 | 
 16 | from podmaker.config import OwnerConfig, SourceConfig
 17 | from podmaker.fetcher import Fetcher
 18 | from podmaker.rss import Enclosure, Episode, Owner, Podcast, Resource
 19 | from podmaker.rss.core import PlainResource
 20 | from podmaker.storage import Storage
 21 | from podmaker.util import exit_signal
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | try:
 26 |     import yt_dlp
 27 | except ImportError:
 28 |     logger.error('yt_dlp is not installed. youtube fetcher is not available.')
 29 |     sys.exit(1)
 30 | 
 31 | 
 32 | class YouTube(Fetcher):
 33 |     def __init__(self, storage: Storage, owner_config: OwnerConfig | None):
 34 |         self.storage = storage
 35 |         self.ydl_opts = {
 36 |             'logger': logging.getLogger('yt_dlp'),
 37 |             'cachedir': tempfile.gettempdir(),
 38 |         }
 39 |         self.owner_config = owner_config
 40 | 
 41 |     def fetch_info(self, url: str) -> dict[str, Any]:
 42 |         with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
 43 |             info = ydl.extract_info(str(url), download=False, process=False)  # type: dict[str, Any]
 44 |             return info
 45 | 
 46 |     def fetch(self, source: SourceConfig) -> Podcast:
 47 |         info = self.fetch_info(str(source.url))
 48 |         if isgenerator(info.get('entries', None)):
 49 |             return self.fetch_entries(info, source)
 50 |         raise ValueError(f'unsupported url: {source.url}')
 51 | 
 52 |     def fetch_entries(self, info: dict[str, Any], source: SourceConfig) -> Podcast:
 53 |         logger.info(f'[{source.id}] parse entries: {source.url}')
 54 |         if self.owner_config:
 55 |             owner = Owner(name=self.owner_config.name, email=self.owner_config.email)
 56 |         else:
 57 |             owner = None
 58 |         podcast = Podcast(
 59 |             items=Entry(info.get('entries', []), self.ydl_opts, self.storage, source),
 60 |             link=urlparse(info['webpage_url']),
 61 |             title=source.name or info['title'],
 62 |             image=EntryThumbnail(info['thumbnails']),
 63 |             description=info['description'],
 64 |             owner=owner,
 65 |             author=info['uploader'],
 66 |             categories=info.get('tags', []),
 67 |         )
 68 |         return podcast
 69 | 
 70 | 
 71 | class Entry(Resource[Iterable[Episode]]):
 72 |     def __init__(
 73 |             self, entries: Iterable[dict[str, Any]], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig):
 74 |         self.entries = entries
 75 |         self.ydl_opts = ydl_opts
 76 |         self.storage = storage
 77 |         self.source = source
 78 | 
 79 |     def get(self) -> Iterable[Episode] | None:
 80 |         logger.debug(f'[{self.source.id}] fetch items')
 81 |         with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
 82 |             is_empty = True
 83 |             for entry in self.entries:
 84 |                 exit_signal.check()
 85 |                 is_empty = False
 86 |                 try:
 87 |                     video_info = ydl.extract_info(entry['url'], download=False)
 88 |                 except yt_dlp.DownloadError as e:
 89 |                     logger.error(f'[{self.source.id}] failed to fetch item({entry["url"]}) due to {e}')
 90 |                     continue
 91 |                 if self.source.regex and not self.source.regex.search(video_info['title']):
 92 |                     logger.info(f'[{self.source.id}] skip item {video_info["id"]} due to regex')
 93 |                     continue
 94 |                 upload_at = datetime.strptime(video_info['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc)
 95 |                 logger.info(f'[{self.source.id}] fetch item: {video_info["id"]}')
 96 |                 yield Episode(
 97 |                     enclosure=Audio(video_info, self.ydl_opts, self.storage, self.source),
 98 |                     title=video_info['title'],
 99 |                     description=video_info['description'],
100 |                     guid=video_info['id'],
101 |                     duration=timedelta(seconds=video_info['duration']),
102 |                     pub_date=upload_at,
103 |                     link=urlparse(video_info['webpage_url']),
104 |                     image=PlainResource(urlparse(video_info['thumbnail'])),
105 |                 )
106 |             if is_empty:
107 |                 return None
108 | 
109 | 
110 | class EntryThumbnail(Resource[ParseResult]):
111 |     def __init__(self, thumbnails: list[dict[str, Any]]):
112 |         self.thumbnails = thumbnails
113 | 
114 |     def get(self) -> ParseResult | None:
115 |         if len(self.thumbnails) == 0:
116 |             return None
117 |         thumbnail = max(self.thumbnails, key=lambda t: t.get('width', 0))
118 |         result: ParseResult = urlparse(thumbnail['url'])
119 |         return result
120 | 
121 | 
122 | class Audio(Resource[Enclosure]):
123 |     def __init__(self, info: dict[str, Any], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig):
124 |         self.info = info
125 |         self.ydl_opts: dict[str, Any] = {
126 |             'format': 'ba',
127 |             'postprocessors': [{
128 |                 'key': 'FFmpegExtractAudio',
129 |                 'preferredcodec': 'mp3',
130 |             }],
131 |         }
132 |         self.ydl_opts.update(ydl_opts)
133 |         self.storage = storage
134 |         self.source = source
135 | 
136 |     def upload(self, key: str) -> tuple[ParseResult, int]:
137 |         logger.debug(f'[{self.source.id}] upload audio: {key}')
138 |         with TemporaryDirectory(prefix='podmaker_youtube_') as cache_dir:
139 |             opts = {'paths': {'home': cache_dir}}
140 |             opts.update(self.ydl_opts)
141 |             with yt_dlp.YoutubeDL(opts) as ydl:
142 |                 logger.info(f'[{self.source.id}] fetch audio: {self.info["id"]}')
143 |                 downloaded_info = ydl.extract_info(self.info['webpage_url'])
144 |                 audio_path = downloaded_info['requested_downloads'][0]['filepath']
145 |                 length = os.path.getsize(audio_path)
146 |             with open(audio_path, 'rb') as f:
147 |                 logger.info(f'[{self.source.id}] upload audio: {key}')
148 |                 url = self.storage.put(f, key=key, content_type='audio/mp3')
149 |         return url, length
150 | 
151 |     @lru_cache(maxsize=1)
152 |     def get(self) -> Enclosure | None:
153 |         logger.debug(f'[{self.source.id}] fetch audio: {self.info["id"]}')
154 |         key = self.source.get_storage_key(f'youtube/{self.info["id"]}.mp3')
155 |         info = self.storage.check(key)
156 |         if info:
157 |             logger.info(f'[{self.source.id}] audio already exists: {key}')
158 |             url = info.uri
159 |             length = info.size
160 |         else:
161 |             url, length = self.upload(key)
162 |         return Enclosure(url=url, length=length, type='audio/mp3')
163 | 


--------------------------------------------------------------------------------
/podmaker/processor/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['Processor', 'ScheduleProcessor', 'get_processor']
 2 | 
 3 | from podmaker.config import PMConfig
 4 | from podmaker.processor.core import Processor
 5 | from podmaker.processor.scheduling import ScheduleProcessor
 6 | from podmaker.storage import Storage
 7 | 
 8 | 
 9 | def get_processor(config: PMConfig, storage: Storage) -> Processor:
10 |     if config.app.mode == 'watch':
11 |         return ScheduleProcessor(config=config, storage=storage)
12 |     else:
13 |         return Processor(config=config, storage=storage)
14 | 


--------------------------------------------------------------------------------
/podmaker/processor/core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | from concurrent.futures import ThreadPoolExecutor
 5 | from contextlib import contextmanager
 6 | from typing import Any, Iterator
 7 | 
 8 | from podmaker.config import PMConfig, SourceConfig
 9 | from podmaker.fetcher import Fetcher
10 | from podmaker.processor.task import Task
11 | from podmaker.storage import Storage
12 | from podmaker.util import exit_signal
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | class Processor:
18 |     def __init__(self, config: PMConfig, storage: Storage):
19 |         self._config = config
20 |         self._storage = storage
21 |         exit_signal.register(self._exit_handler)
22 |         self._fetcher_instances: dict[str, Fetcher] = {}
23 | 
24 |     @contextmanager
25 |     def _context(self) -> Iterator[None]:
26 |         for fetcher in self._fetcher_instances.values():
27 |             fetcher.start()
28 |         try:
29 |             yield
30 |         finally:
31 |             for fetcher in self._fetcher_instances.values():
32 |                 fetcher.stop()
33 | 
34 |     def _get_fetcher(self, source: SourceConfig) -> Fetcher:
35 |         if source.url.host not in self._fetcher_instances:
36 |             if source.url.host == 'www.youtube.com':
37 |                 from podmaker.fetcher.youtube import YouTube
38 |                 self._fetcher_instances[source.url.host] = YouTube(self._storage, self._config.owner)
39 |             else:
40 |                 raise ValueError(f'unsupported host: {source.url.host}')
41 |         return self._fetcher_instances[source.url.host]
42 | 
43 |     @property
44 |     def _tasks(self) -> Iterator[Task]:
45 |         for source in self._config.sources:
46 |             fetcher = self._get_fetcher(source)
47 |             yield Task(fetcher, source, self._storage, self._config.owner)
48 | 
49 |     def _exit_handler(self, *_: Any) -> None:
50 |         logger.warning('received exit signal')
51 |         self.exit_handler()
52 | 
53 |     def exit_handler(self, *_: Any) -> None:
54 |         pass
55 | 
56 |     def run(self) -> None:
57 |         with self._context():
58 |             with ThreadPoolExecutor(max_workers=5) as executor:
59 |                 for task in self._tasks:
60 |                     logger.info(f'submit task: {task.id}')
61 |                     executor.submit(task.execute)
62 |             logger.info('processor exited')
63 | 


--------------------------------------------------------------------------------
/podmaker/processor/scheduling.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | from typing import Any
 4 | 
 5 | from apscheduler.jobstores.base import JobLookupError
 6 | from apscheduler.schedulers.blocking import BlockingScheduler
 7 | from apscheduler.triggers.interval import IntervalTrigger
 8 | 
 9 | from podmaker.config import PMConfig
10 | from podmaker.processor.core import Processor
11 | from podmaker.storage import Storage
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class ScheduleProcessor(Processor):
17 |     def __init__(self, config: PMConfig, storage: Storage):
18 |         super().__init__(config, storage)
19 |         self._scheduler = BlockingScheduler()
20 | 
21 |     def exit_handler(self, *_: Any) -> None:
22 |         self._scheduler.shutdown(wait=False)
23 | 
24 |     def _before_hook(self, task_id: str) -> None:
25 |         try:
26 |             self._scheduler.pause_job(task_id)
27 |         except JobLookupError:
28 |             logger.warning(f'task({task_id}) not found, maybe it was removed')
29 | 
30 |     def _after_hook(self, task_id: str) -> None:
31 |         try:
32 |             self._scheduler.resume_job(task_id)
33 |         except JobLookupError:
34 |             logger.warning(f'task({task_id}) not found, maybe it was removed')
35 | 
36 |     def run(self) -> None:
37 |         with self._context():
38 |             for task in self._tasks:
39 |                 logger.info(f'schedule task: {task.id}, it well be run after 1 minute and every 1 hour')
40 |                 task.before = self._before_hook
41 |                 task.after = self._after_hook
42 |                 self._scheduler.add_job(
43 |                     func=task.execute,
44 |                     trigger=IntervalTrigger(seconds=task.interval),
45 |                     next_run_time=datetime.now(),
46 |                     id=task.id,
47 |                     name=f'Job-{task.id}',
48 |                 )
49 |             self._scheduler.start()
50 |             logger.info('processor exited')
51 | 
52 | 


--------------------------------------------------------------------------------
/podmaker/processor/task.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | from io import BytesIO
 5 | from typing import Any, Callable
 6 | from uuid import uuid4
 7 | 
 8 | from podmaker.config import OwnerConfig, SourceConfig
 9 | from podmaker.fetcher import Fetcher
10 | from podmaker.rss import Podcast
11 | from podmaker.storage import EMPTY_FILE, Storage
12 | from podmaker.util import ExitSignalError
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | Hook = Callable[[str], None]
17 | 
18 | 
19 | def _do_nothing(*_: Any) -> None:
20 |     pass
21 | 
22 | 
23 | class Task:
24 |     def __init__(self, fetcher: Fetcher, source: SourceConfig, storage: Storage, owner: OwnerConfig | None):
25 |         self._id = uuid4().hex
26 |         logger.info(f'create task {self._id} for {source.id}')
27 |         self._source = source
28 |         self._storage = storage
29 |         self._owner = owner
30 |         self._fetcher = fetcher
31 |         self.before: Hook = _do_nothing
32 |         self.after: Hook = _do_nothing
33 | 
34 |     @property
35 |     def id(self) -> str:
36 |         return self._id
37 | 
38 |     @property
39 |     def interval(self) -> int:
40 |         return self._source.interval
41 | 
42 |     def _fetch_original(self, key: str) -> Podcast | None:
43 |         with self._storage.get(key) as xml_file:
44 |             if xml_file == EMPTY_FILE:
45 |                 logger.info(f'no original file: {key}')
46 |                 return None
47 |             xml = xml_file.read()
48 |         return Podcast.from_rss(xml.decode('utf-8'))
49 | 
50 |     def _execute(self) -> None:
51 |         logger.info(f'execute task: {self.id}')
52 |         try:
53 |             key = self._source.get_storage_key('feed.rss')
54 |             original_pod = self._fetch_original(key)
55 |             source_pod = self._fetcher.fetch(self._source)
56 |             if original_pod:
57 |                 has_changed = original_pod.merge(source_pod)
58 |             else:
59 |                 has_changed = True
60 |                 original_pod = source_pod
61 |             if has_changed:
62 |                 logger.info(f'update: {self._source.id}')
63 |                 buf = BytesIO(original_pod.bytes)
64 |                 self._storage.put(buf, key, content_type='text/xml; charset=utf-8')
65 |             else:
66 |                 logger.info(f'no change: {self._source.id}')
67 |         except ExitSignalError as e:
68 |             logger.warning(f'task ({self.id}) cancelled due to {e}')
69 |         except BaseException as e:
70 |             logger.error(f'task execute failed: {e} task: {self.id}')
71 | 
72 |     def execute(self) -> None:
73 |         logger.debug(f'task running: {self._source.id}')
74 |         self.before(self.id)
75 |         self._execute()
76 |         logger.debug(f'task finished: {self.id}')
77 |         self.after(self.id)
78 | 


--------------------------------------------------------------------------------
/podmaker/rss/README.md:
--------------------------------------------------------------------------------
1 | Read more about the RSS feed in
2 | the [RSS feed guidelines for Google Podcasts](https://support.google.com/podcast-publishers/answer/9889544?sjid=3442458601435072975-NA) and
3 | [Podcast RSS feed requirements for Apple Podcasts](https://podcasters.apple.com/support/823-podcast-requirements).
4 | 


--------------------------------------------------------------------------------
/podmaker/rss/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     'Resource',
 3 |     'Enclosure',
 4 |     'Episode',
 5 |     'Podcast',
 6 |     'Owner',
 7 | ]
 8 | 
 9 | from podmaker.rss.core import Resource
10 | from podmaker.rss.enclosure import Enclosure
11 | from podmaker.rss.episode import Episode
12 | from podmaker.rss.podcast import Owner, Podcast
13 | 


--------------------------------------------------------------------------------
/podmaker/rss/core.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import sys
  4 | from abc import ABCMeta, abstractmethod
  5 | from typing import Any, Generic, TypeVar
  6 | from xml.etree.ElementTree import Element, fromstring, tostring
  7 | 
  8 | from podmaker.rss.util.namespace import NamespaceGenerator
  9 | from podmaker.rss.util.parse import XMLParser
 10 | from podmaker.util import exit_signal
 11 | 
 12 | if sys.version_info >= (3, 11):
 13 |     from typing import Self
 14 | else:
 15 |     from typing_extensions import Self
 16 | 
 17 | ResourceType = TypeVar('ResourceType')
 18 | 
 19 | 
 20 | class Resource(Generic[ResourceType], metaclass=ABCMeta):
 21 |     @abstractmethod
 22 |     def get(self) -> ResourceType | None:
 23 |         raise NotImplementedError
 24 | 
 25 |     def ensure(self) -> ResourceType:
 26 |         resource = self.get()
 27 |         if resource is None:
 28 |             raise ValueError('Resource not found')
 29 |         return resource
 30 | 
 31 |     def __getattribute__(self, name: Any) -> Any:
 32 |         if name == 'get':
 33 |             exit_signal.check()
 34 |         return super().__getattribute__(name)
 35 | 
 36 | 
 37 | class PlainResource(Resource[ResourceType]):
 38 |     """
 39 |     A resource that is not fetched from a remote location.
 40 |     It is useful for store resources that are already available in memory.
 41 |     """
 42 | 
 43 |     def __init__(self, resource: ResourceType):
 44 |         self.resource = resource
 45 | 
 46 |     def get(self) -> ResourceType:
 47 |         return self.resource
 48 | 
 49 | 
 50 | # noinspection HttpUrlsUsage
 51 | itunes = NamespaceGenerator('itunes', 'http://www.itunes.com/dtds/podcast-1.0.dtd')
 52 | # noinspection HttpUrlsUsage
 53 | content = NamespaceGenerator('content', 'http://purl.org/rss/1.0/modules/content/')
 54 | 
 55 | 
 56 | class RSSComponent(XMLParser, metaclass=ABCMeta):
 57 |     namespace = dict(**itunes.namespace, **content.namespace)
 58 | 
 59 |     @property
 60 |     @abstractmethod
 61 |     def xml(self) -> Element:
 62 |         raise NotImplementedError
 63 | 
 64 |     @classmethod
 65 |     @abstractmethod
 66 |     def from_xml(cls, el: Element) -> Self:
 67 |         raise NotImplementedError
 68 | 
 69 |     @abstractmethod
 70 |     def merge(self, other: Self) -> bool:
 71 |         """
 72 |         Merge the other component into this one.
 73 |         :return: Whether changes were made.
 74 |         """
 75 |         raise NotImplementedError
 76 | 
 77 |     @staticmethod
 78 |     def _el_creator(tag: str, text: str | None = None, attrib: dict[str, str] | None = None) -> Element:
 79 |         el = Element(tag, attrib or {})
 80 |         if text is not None:
 81 |             el.text = text
 82 |         return el
 83 | 
 84 |     def _common_merge(self, other: Self, field: str | tuple[str, ...]) -> bool:
 85 |         if isinstance(field, tuple):
 86 |             return any(self._common_merge(other, f) for f in field)
 87 |         a = getattr(self, field)
 88 |         b = getattr(other, field)
 89 |         if a != b:
 90 |             setattr(self, field, b)
 91 |             return True
 92 |         return False
 93 | 
 94 | 
 95 | # https://www.w3.org/TR/xml/#sec-pi
 96 | _pis = '<?xml version="1.0" encoding="UTF-8"?>'
 97 | _pis_bytes = _pis.encode('utf-8')
 98 | 
 99 | 
100 | class RSSSerializer(RSSComponent, metaclass=ABCMeta):
101 |     @property
102 |     def str(self) -> str:
103 |         s = tostring(self.xml, encoding='unicode')
104 |         return _pis + s
105 | 
106 |     @property
107 |     def bytes(self) -> bytes:
108 |         s = tostring(self.xml, encoding='utf-8')  # type: bytes
109 |         return _pis_bytes + s
110 | 
111 | 
112 | class RSSDeserializer(RSSComponent, metaclass=ABCMeta):
113 |     @classmethod
114 |     def from_rss(cls, rss: str | bytes) -> Self:
115 |         if isinstance(rss, bytes):
116 |             rss = rss.decode('utf-8')
117 |         el: Element = fromstring(rss)
118 |         return cls.from_xml(el)
119 | 


--------------------------------------------------------------------------------
/podmaker/rss/enclosure.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from dataclasses import dataclass
 3 | from urllib.parse import ParseResult, urlparse
 4 | from xml.etree.ElementTree import Element
 5 | 
 6 | from podmaker.rss.core import RSSComponent
 7 | 
 8 | if sys.version_info >= (3, 11):
 9 |     from typing import Self
10 | else:
11 |     from typing_extensions import Self
12 | 
13 | 
14 | @dataclass
15 | class Enclosure(RSSComponent):
16 |     # URL of the episode audio file.
17 |     url: ParseResult
18 |     # Size of the episode audio file in bytes.
19 |     length: int
20 |     # The standard MIME type of the episode.
21 |     type: str
22 | 
23 |     @property
24 |     def xml(self) -> Element:
25 |         return self._el_creator(
26 |             'enclosure',
27 |             attrib={'url': self.url.geturl(), 'length': str(self.length), 'type': self.type}
28 |         )
29 | 
30 |     @classmethod
31 |     def from_xml(cls, el: Element) -> Self:
32 |         url = urlparse(cls._parse_required_attrib(el, '.', 'url'))
33 |         length_str = cls._parse_required_attrib(el, '.', 'length')
34 |         try:
35 |             length = int(length_str)
36 |         except ValueError:
37 |             raise ValueError(f'length must be int: {length_str}')
38 |         content_type = cls._parse_required_attrib(el, '.', 'type')
39 |         return cls(
40 |             url,
41 |             length,
42 |             content_type
43 |         )
44 | 
45 |     def merge(self, other: Self) -> bool:
46 |         return self._common_merge(other, ('url', 'length', 'type'))
47 | 


--------------------------------------------------------------------------------
/podmaker/rss/episode.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import math
  5 | import sys
  6 | from dataclasses import dataclass
  7 | from datetime import datetime, timedelta, timezone
  8 | from email.utils import format_datetime, parsedate_to_datetime
  9 | from typing import Any
 10 | from urllib.parse import ParseResult, urlparse
 11 | from xml.etree.ElementTree import Element
 12 | 
 13 | from podmaker.rss import Enclosure, Resource
 14 | from podmaker.rss.core import PlainResource, RSSComponent, itunes
 15 | 
 16 | if sys.version_info >= (3, 11):
 17 |     from typing import Self
 18 | else:
 19 |     from typing_extensions import Self
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | @dataclass
 25 | class Episode(RSSComponent):
 26 |     # Fully-qualified URL of the episode audio file, including the format extension (for example, .wav, .mp3).
 27 |     enclosure: Resource[Enclosure]
 28 |     # Title of the podcast episode.
 29 |     title: str
 30 |     # A plaintext description of the podcast.
 31 |     description: str | None = None
 32 |     # Indicates whether this episode contains explicit language or adult content.
 33 |     explicit: bool | None = False
 34 |     # A permanently-assigned, case-sensitive Globally Unique Identifier for a podcast episode.
 35 |     guid: str | None = None
 36 |     # Duration of the episode.
 37 |     duration: timedelta | None = None
 38 |     # Publication date of the episode, in RFC 822 (section 5.1) format.
 39 |     # https://www.rfc-editor.org/rfc/rfc822#section-5.1
 40 |     pub_date: datetime | None = None
 41 |     # An episode link URL.
 42 |     link: ParseResult | None = None
 43 |     # The episode artwork.
 44 |     image: Resource[ParseResult] | None = None
 45 | 
 46 |     @property
 47 |     def xml(self) -> Element:
 48 |         el = Element('item')
 49 |         el.append(self._enclosure_el)
 50 |         el.append(self._title_el)
 51 |         el.append(self._itunes_title_el)
 52 |         if self.description:
 53 |             el.append(self._description_el)
 54 |             el.append(self._summary_e)
 55 |         if self.explicit is not None:
 56 |             el.append(self._explicit_el)
 57 |         if self.guid:
 58 |             el.append(self._guid_el)
 59 |         if self.duration:
 60 |             el.append(self._duration_el)
 61 |         if self.pub_date:
 62 |             el.append(self._pub_date_el)
 63 |         if self.link:
 64 |             el.append(self._link_el)
 65 |         if self.image:
 66 |             el.append(self._image_el)
 67 |         return el
 68 | 
 69 |     @classmethod
 70 |     def from_xml(cls, el: Element) -> Self:
 71 |         enclosure = cls._parse_enclosure(el)
 72 |         itunes_title = cls._parse_optional_text(el, f'.{itunes("title")}')
 73 |         if itunes_title is None:
 74 |             title = cls._parse_required_text(el, '.title')
 75 |         else:
 76 |             title = itunes_title
 77 |         description = cls._parse_optional_text(el, '.description')
 78 |         if description is None:
 79 |             description = cls._parse_optional_text(el, f'.{itunes("summary")}')
 80 |         explicit_str = cls._parse_optional_text(el, f'.{itunes("explicit")}')
 81 |         explicit = explicit_str == 'yes' if explicit_str is not None else None
 82 |         guid = cls._parse_optional_text(el, '.guid')
 83 |         duration = cls._parse_duration(el)
 84 |         pub_date = cls._parse_pub_date(el)
 85 |         link_str = cls._parse_optional_text(el, '.link')
 86 |         if link_str is not None:
 87 |             link = urlparse(link_str)
 88 |         else:
 89 |             link = None
 90 |         image_url = cls._parse_optional_attrib(el, f'.{itunes("image")}', 'href')
 91 |         if image_url is not None:
 92 |             image = PlainResource(urlparse(image_url))
 93 |         else:
 94 |             image = None
 95 |         return cls(enclosure, title, description, explicit, guid, duration, pub_date, link, image)
 96 | 
 97 |     def merge(self, other: Self) -> bool:
 98 |         has_changed = False
 99 |         enclosure = self.enclosure.ensure()
100 |         if enclosure.merge(other.enclosure.ensure()):
101 |             has_changed = True
102 |             self.enclosure = PlainResource(enclosure)
103 |         return any([
104 |             has_changed,
105 |             self._common_merge(
106 |                 other,
107 |                 ('title', 'description', 'explicit', 'guid', 'duration', 'pub_date')
108 |             )
109 |         ])
110 | 
111 |     @property
112 |     def unique_id(self) -> str:
113 |         if self.guid is None:
114 |             return self.enclosure.ensure().url.geturl()
115 |         return self.guid
116 | 
117 |     def __eq__(self, other: Any) -> bool:
118 |         if not isinstance(other, Episode):
119 |             return False
120 |         return self.unique_id == other.unique_id
121 | 
122 |     def __hash__(self) -> int:
123 |         return hash(self.unique_id)
124 | 
125 |     @classmethod
126 |     def _parse_pub_date(cls, el: Element) -> datetime | None:
127 |         pub_date_str = cls._parse_optional_text(el, '.pubDate')
128 |         if pub_date_str is None:
129 |             return None
130 |         try:
131 |             dt = parsedate_to_datetime(pub_date_str)
132 |         except (TypeError, ValueError):
133 |             try:
134 |                 if pub_date_str.endswith('Z'):
135 |                     pub_date_str = pub_date_str[:-1] + '+00:00'
136 |                 dt = datetime.fromisoformat(pub_date_str)
137 |             except ValueError:
138 |                 logger.warning(f'invalid pubDate: {pub_date_str}')
139 |                 return None
140 |         if dt.tzinfo is None:
141 |             return dt.replace(tzinfo=timezone.utc)
142 |         return dt
143 | 
144 |     @classmethod
145 |     def _parse_enclosure(cls, el: Element) -> PlainResource[Enclosure]:
146 |         enclosure_el = cls._parse_required_el(el, '.enclosure')
147 |         return PlainResource(Enclosure.from_xml(enclosure_el))
148 | 
149 |     @classmethod
150 |     def _parse_duration(cls, el: Element) -> timedelta | None:
151 |         duration_str = cls._parse_optional_text(el, f'.{itunes("duration")}')
152 |         if duration_str is None:
153 |             return None
154 |         try:
155 |             if ':' in duration_str:
156 |                 secs = 0
157 |                 for c in duration_str.split(':'):
158 |                     secs = secs * 60 + int(c)
159 |             else:
160 |                 secs = int(duration_str)
161 |             return timedelta(seconds=secs)
162 |         except ValueError:
163 |             logger.warning(f'invalid duration: {duration_str}')
164 |             return None
165 | 
166 |     @property
167 |     def _enclosure_el(self) -> Element:
168 |         return self.enclosure.ensure().xml
169 | 
170 |     @property
171 |     def _title_el(self) -> Element:
172 |         return self._el_creator('title', self.title)
173 | 
174 |     @property
175 |     def _itunes_title_el(self) -> Element:
176 |         return itunes.el('title', text=self.title)
177 | 
178 |     @property
179 |     def _description_el(self) -> Element:
180 |         if self.description is None:
181 |             raise ValueError('description is required')
182 |         return self._el_creator('description', self.description)
183 | 
184 |     @property
185 |     def _summary_e(self) -> Element:
186 |         if self.description is None:
187 |             raise ValueError('description is required')
188 |         return itunes.el('summary', text=self.description)
189 | 
190 |     @property
191 |     def _explicit_el(self) -> Element:
192 |         return itunes.el('explicit', text='yes' if self.explicit else 'no')
193 | 
194 |     @property
195 |     def _guid_el(self) -> Element:
196 |         if self.guid is None:
197 |             raise ValueError('empty guid field')
198 |         is_perma_link = 'false'
199 |         if self.guid.startswith('http'):
200 |             is_perma_link = 'true'
201 |         return self._el_creator('guid', self.guid, {'isPermaLink': is_perma_link})
202 | 
203 |     @property
204 |     def _duration_el(self) -> Element:
205 |         if self.duration is None:
206 |             raise ValueError('empty duration field')
207 |         dur = math.ceil(self.duration.total_seconds())
208 |         return itunes.el('duration', text=str(dur))
209 | 
210 |     @property
211 |     def _pub_date_el(self) -> Element:
212 |         if self.pub_date is None:
213 |             raise ValueError('empty pub_date field')
214 |         return self._el_creator('pubDate', format_datetime(self.pub_date))
215 | 
216 |     @property
217 |     def _link_el(self) -> Element:
218 |         if self.link is None:
219 |             raise ValueError('empty link field')
220 |         return self._el_creator('link', self.link.geturl())
221 | 
222 |     @property
223 |     def _image_el(self) -> Element:
224 |         if self.image is None:
225 |             raise ValueError('empty image field')
226 |         return itunes.el('image', attrib={'href': self.image.ensure().geturl()})
227 | 


--------------------------------------------------------------------------------
/podmaker/rss/podcast.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import re
  4 | import sys
  5 | from collections.abc import Iterable
  6 | from dataclasses import dataclass, field
  7 | from typing import Any
  8 | from urllib.parse import ParseResult, urlparse
  9 | from xml.etree.ElementTree import Element
 10 | 
 11 | from podmaker.rss import Episode, Resource
 12 | from podmaker.rss.core import PlainResource, RSSDeserializer, RSSSerializer, itunes
 13 | 
 14 | if sys.version_info >= (3, 11):
 15 |     from typing import Self
 16 | else:
 17 |     from typing_extensions import Self
 18 | 
 19 | _category_pattern = re.compile(r'^[\w &]+$')
 20 | 
 21 | 
 22 | @dataclass
 23 | class Owner:
 24 |     email: str
 25 |     name: str | None = None
 26 | 
 27 |     def __eq__(self, other: Any) -> bool:
 28 |         if not isinstance(other, Owner):
 29 |             return False
 30 |         return self.email == other.email and self.name == other.name
 31 | 
 32 | 
 33 | @dataclass
 34 | class Podcast(RSSSerializer, RSSDeserializer):
 35 |     # Defines an episodes. At least one element in the items.
 36 |     items: Resource[Iterable[Episode]]
 37 |     # Fully-qualified URL of the homepage of the podcast.
 38 |     link: ParseResult
 39 |     # Name of the podcast.
 40 |     title: str
 41 |     # An image to associate with the podcast.
 42 |     image: Resource[ParseResult]
 43 |     # A plaintext description of the podcast.
 44 |     description: str
 45 |     # Text name(s) of the author(s) of this podcast.
 46 |     # This need not be the same as the owner value.
 47 |     author: str
 48 |     # Manager's email for the podcast.
 49 |     owner: Owner | None = None
 50 |     # The general topic of the podcast.
 51 |     categories: list[str] = field(default_factory=list)
 52 |     # Indicates whether the podcast is explicit language or adult content.
 53 |     explicit: bool = False
 54 |     # The two-letter language code of the podcast as defined by ISO 639-1.
 55 |     # https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
 56 |     language: str = 'en'
 57 | 
 58 |     @property
 59 |     def xml(self) -> Element:
 60 |         el = self._el_creator('rss', attrib={'version': '2.0'})
 61 |         channel = self._el_creator('channel')
 62 |         el.append(channel)
 63 |         channel.append(self._generator_el)
 64 |         channel.append(self._link_el)
 65 |         channel.append(self._title_el)
 66 |         channel.append(self._itunes_image_el)
 67 |         channel.append(self._image_el)
 68 |         channel.append(self._description_el)
 69 |         channel.append(self._summary_el)
 70 |         if self.owner:
 71 |             channel.append(self._owner_el)
 72 |         channel.append(self._author_el)
 73 |         for category in self._category_el:
 74 |             channel.append(category)
 75 |         channel.append(self._explicit_el)
 76 |         channel.append(self._language_el)
 77 |         for item in self._items_el:
 78 |             channel.append(item)
 79 |         return el
 80 | 
 81 |     @classmethod
 82 |     def from_xml(cls, el: Element) -> Self:
 83 |         items = cls._parse_items(el)
 84 |         link = urlparse(cls._parse_required_text(el, '.channel/link'))
 85 |         title = cls._parse_required_text(el, '.channel/title')
 86 |         image = cls._parse_image(el)
 87 |         description = cls._parse_required_text(el, '.channel/description')
 88 |         owner = cls._parse_owner(el)
 89 |         author = cls._parse_required_text(el, f'.channel/{itunes("author")}')
 90 |         categories = cls._parse_categories(el)
 91 |         explicit = cls._parse_optional_text(el, f'.channel/{itunes("explicit")}') == 'yes'
 92 |         language = cls._parse_optional_text(el, '.channel/language') or 'en'
 93 |         return cls(
 94 |             items,
 95 |             link,
 96 |             title,
 97 |             image,
 98 |             description,
 99 |             author,
100 |             owner,
101 |             categories,
102 |             explicit,
103 |             language
104 |         )
105 | 
106 |     def merge(self, other: Self) -> bool:
107 |         has_changed = self._common_merge(
108 |             other,
109 |             ('link', 'title', 'description', 'owner', 'author', 'explicit', 'language')
110 |         )
111 |         image_url = self.image.get()
112 |         if image_url != other.image.get():
113 |             self.image = other.image
114 |             has_changed = True
115 |         if set(self.categories) != set(other.categories):
116 |             self.categories = other.categories
117 |             has_changed = True
118 |         if self._merge_items(other.items):
119 |             has_changed = True
120 |         return has_changed
121 | 
122 |     def _merge_items(self, others: Resource[Iterable[Episode]]) -> bool:
123 |         new_items = []
124 |         has_changed = False
125 |         old_ids = {i.unique_id: i for i in self.items.ensure()}
126 |         for item in others.ensure():
127 |             if item.unique_id not in old_ids:
128 |                 new_items.append(item)
129 |             else:
130 |                 old_item = old_ids[item.unique_id]
131 |                 has_changed = old_item.merge(item) or has_changed
132 |         if not new_items and not has_changed:
133 |             return False
134 |         sorted_items = sorted(
135 |             list(self.items.ensure()) + new_items,
136 |             key=lambda i: i.pub_date or 0,
137 |             reverse=True
138 |         )
139 |         self.items = PlainResource(sorted_items)
140 |         return True
141 | 
142 |     @classmethod
143 |     def _parse_owner(cls, el: Element) -> Owner | None:
144 |         owner_el = cls._parse_optional_el(el, f'.channel/{itunes("owner")}')
145 |         if owner_el is None:
146 |             return None
147 |         owner_name = cls._parse_optional_text(owner_el, f'.{itunes("name")}')
148 |         owner_email = cls._parse_required_text(owner_el, f'.{itunes("email")}')
149 |         return Owner(owner_email, owner_name)
150 | 
151 |     @classmethod
152 |     def _parse_items(cls, el: Element) -> Resource[Iterable[Episode]]:
153 |         item_els = cls._parse_els(el, '.channel/item')
154 |         if not item_els:
155 |             raise ValueError('items is required')
156 |         items = []
157 |         for item_el in item_els:
158 |             items.append(Episode.from_xml(item_el))
159 |         if not items:
160 |             raise ValueError('items is required')
161 |         return PlainResource(items)
162 | 
163 |     @classmethod
164 |     def _parse_categories(cls, el: Element) -> list[str]:
165 |         categories = []
166 |         for category_el in cls._parse_els(el, f'.channel/{itunes("category")}'):
167 |             if category_el.text:
168 |                 categories.append(category_el.text.strip())
169 |             elif category_el.get('text'):
170 |                 categories.append(category_el.get('text')) # type: ignore[arg-type]
171 |         return categories
172 | 
173 |     @classmethod
174 |     def _parse_image(cls, el: Element) -> Resource[ParseResult]:
175 |         href = cls._parse_optional_attrib(el, f'.channel/{itunes("image")}', 'href')
176 |         if href:
177 |             return PlainResource(urlparse(href))
178 |         image_url = cls._parse_required_text(el, '.channel/image/url')
179 |         return PlainResource(urlparse(image_url))
180 | 
181 |     @property
182 |     def _generator_el(self) -> Element:
183 |         el = self._el_creator('generator')
184 |         el.append(self._el_creator('name', 'podmaker'))
185 |         el.append(self._el_creator('link', 'https://github.com/YogiLiu/podmaker'))
186 |         return el
187 | 
188 |     @property
189 |     def _items_el(self) -> Iterable[Element]:
190 |         is_empty = True
191 |         for item in self.items.ensure():
192 |             is_empty = False
193 |             yield item.xml
194 |         if is_empty:
195 |             raise ValueError('items is required')
196 | 
197 |     @property
198 |     def _link_el(self) -> Element:
199 |         return self._el_creator('link', self.link.geturl())
200 | 
201 |     @property
202 |     def _title_el(self) -> Element:
203 |         return self._el_creator('title', self.title)
204 | 
205 |     @property
206 |     def _itunes_image_el(self) -> Element:
207 |         return itunes.el('image', attrib={'href': self.image.ensure().geturl()})
208 | 
209 |     @property
210 |     def _image_el(self) -> Element:
211 |         el = self._el_creator('image')
212 |         el.append(self._el_creator('link', self.link.geturl()))
213 |         el.append(self._el_creator('title', self.title))
214 |         el.append(self._el_creator('url', self.image.ensure().geturl()))
215 |         return el
216 | 
217 |     @property
218 |     def _description_el(self) -> Element:
219 |         return self._el_creator('description', self.description)
220 | 
221 |     @property
222 |     def _summary_el(self) -> Element:
223 |         return itunes.el('summary', text=self.description)
224 | 
225 |     @property
226 |     def _owner_el(self) -> Element:
227 |         if self.owner is None:
228 |             raise ValueError('empty owner field')
229 |         el = itunes.el('owner')
230 |         if self.owner.name:
231 |             el.append(itunes.el('name', text=self.owner.name))
232 |         el.append(itunes.el('email', text=self.owner.email))
233 |         return el
234 | 
235 |     @property
236 |     def _author_el(self) -> Element:
237 |         return itunes.el('author', text=self.author)
238 | 
239 |     @property
240 |     def _category_el(self) -> Iterable[Element]:
241 |         for category in self.categories:
242 |             parsed_category = self._parse_category(category)
243 |             if parsed_category is not None:
244 |                 yield itunes.el('category', attrib={'text': parsed_category})
245 | 
246 |     @staticmethod
247 |     def _parse_category(category: str) -> str | None:
248 |         if not _category_pattern.match(category):
249 |             return None
250 |         return category.capitalize()
251 | 
252 |     @property
253 |     def _explicit_el(self) -> Element:
254 |         return itunes.el('explicit', text='yes' if self.explicit else 'no')
255 | 
256 |     @property
257 |     def _language_el(self) -> Element:
258 |         if self.language is None:
259 |             raise ValueError('empty language field')
260 |         return self._el_creator('language', self.language)
261 | 


--------------------------------------------------------------------------------
/podmaker/rss/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/rss/util/__init__.py


--------------------------------------------------------------------------------
/podmaker/rss/util/namespace.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from xml.etree.ElementTree import Element, QName, register_namespace
 4 | 
 5 | 
 6 | class NamespaceGenerator:
 7 |     def __init__(self, prefix: str, uri: str):
 8 |         self.prefix = prefix
 9 |         self.url = uri
10 |         register_namespace(prefix, uri)
11 | 
12 |     @property
13 |     def namespace(self) -> dict[str, str]:
14 |         return {self.prefix: self.url}
15 | 
16 |     def __call__(self, tag: str) -> QName:
17 |         return QName(self.url, tag)
18 | 
19 |     def el(self, tag: str, *, text: str| None = None, attrib: dict[str, str] | None = None) -> Element:
20 |         el = Element(self(tag).text, attrib or {})
21 |         if text is not None:
22 |             el.text = text
23 |         return el
24 | 


--------------------------------------------------------------------------------
/podmaker/rss/util/parse.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC
 4 | from xml.etree.ElementTree import Element
 5 | 
 6 | 
 7 | class XMLParser(ABC):
 8 |     namespace: dict[str, str] = {}
 9 | 
10 |     @classmethod
11 |     def _parse_optional_text(cls, el: Element, xpath: str) -> str | None:
12 |         text = el.findtext(xpath, namespaces=cls.namespace)
13 |         if text is None:
14 |             return None
15 |         return text.strip()
16 | 
17 |     @classmethod
18 |     def _parse_required_text(cls, el: Element, xpath: str) -> str:
19 |         text = cls._parse_optional_text(el, xpath)
20 |         if text is None:
21 |             raise ValueError(f'{xpath} is required')
22 |         return text
23 | 
24 |     @classmethod
25 |     def _parse_optional_el(cls, el: Element, xpath: str) -> Element | None:
26 |         return el.find(xpath, namespaces=cls.namespace)
27 | 
28 |     @classmethod
29 |     def _parse_required_el(cls, el: Element, xpath: str) -> Element:
30 |         target = cls._parse_optional_el(el, xpath)
31 |         if target is None:
32 |             raise ValueError(f'{xpath} is required')
33 |         return target
34 | 
35 |     @classmethod
36 |     def _parse_els(cls, el: Element, xpath: str) -> list[Element]:
37 |         return el.findall(xpath, namespaces=cls.namespace)
38 | 
39 |     @classmethod
40 |     def _parse_optional_attrib(cls, el: Element, xpath: str, attrib: str) -> str | None:
41 |         target = cls._parse_optional_el(el, xpath)
42 |         if target is None:
43 |             return None
44 |         attrib_value = target.get(attrib, None)
45 |         if attrib_value is None:
46 |             return None
47 |         return attrib_value.strip()
48 | 
49 |     @classmethod
50 |     def _parse_required_attrib(cls, el: Element, xpath: str, attrib: str) -> str:
51 |         text = cls._parse_optional_attrib(el, xpath, attrib)
52 |         if text is None:
53 |             raise ValueError(f'attrib {attrib} of {xpath} is required')
54 |         return text
55 | 


--------------------------------------------------------------------------------
/podmaker/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['Storage', 'ObjectInfo', 'EMPTY_FILE', 'get_storage']
 2 | 
 3 | from podmaker.config import LocalConfig, S3Config, StorageConfig
 4 | from podmaker.storage.core import EMPTY_FILE, ObjectInfo, Storage
 5 | 
 6 | 
 7 | def get_storage(config: StorageConfig) -> Storage:
 8 |     if isinstance(config, S3Config):
 9 |         from podmaker.storage.s3 import S3
10 |         return S3(config)
11 |     elif isinstance(config, LocalConfig):
12 |         from podmaker.storage.local import Local
13 |         return Local(config)
14 |     else:
15 |         raise ValueError(f'unknown storage destination: {config.dest}')
16 | 


--------------------------------------------------------------------------------
/podmaker/storage/core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from contextlib import contextmanager
 5 | from dataclasses import dataclass
 6 | from io import BytesIO
 7 | from typing import IO, AnyStr, Iterator
 8 | from urllib.parse import ParseResult
 9 | 
10 | 
11 | @dataclass
12 | class ObjectInfo:
13 |     # Fully-qualified URL of the object.
14 |     uri: ParseResult
15 |     # Size of the object in bytes.
16 |     size: int
17 |     # The standard MIME type of the object.
18 |     type: str
19 | 
20 | 
21 | EMPTY_FILE = BytesIO(b'')
22 | 
23 | 
24 | class Storage(ABC):
25 |     @abstractmethod
26 |     def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
27 |         """
28 |         :return: data uri
29 |         """
30 |         raise NotImplementedError
31 | 
32 |     @abstractmethod
33 |     def check(self, key: str) -> ObjectInfo | None:
34 |         raise NotImplementedError
35 | 
36 |     @abstractmethod
37 |     @contextmanager
38 |     def get(self, key: str) -> Iterator[IO[bytes]]:
39 |         """
40 |         :return: file-like object, return `EMPTY_FILE` if not found
41 |         """
42 |         raise NotImplementedError
43 | 
44 |     def start(self) -> None:
45 |         pass
46 | 
47 |     def stop(self) -> None:
48 |         pass
49 | 


--------------------------------------------------------------------------------
/podmaker/storage/local.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __all__ = ['Local']
  4 | 
  5 | import logging
  6 | import sqlite3
  7 | import threading
  8 | from contextlib import contextmanager
  9 | from pathlib import Path
 10 | from typing import IO, AnyStr, Iterator
 11 | from urllib.parse import ParseResult, urljoin, urlparse
 12 | 
 13 | from podmaker.config import LocalConfig
 14 | from podmaker.storage import ObjectInfo, Storage
 15 | from podmaker.storage.core import EMPTY_FILE
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | lock = threading.Lock()
 19 | 
 20 | 
 21 | class Local(Storage):
 22 |     _db: sqlite3.Connection
 23 |     _file_buffering = 10 * 1024 * 1024  # 10MB
 24 | 
 25 |     def __init__(self, config: LocalConfig):
 26 |         self.public_endpoint = str(config.public_endpoint)
 27 |         self.base_dir = Path(config.base_dir)
 28 |         self.data_dir = self.base_dir / 'data'
 29 | 
 30 |     def start(self) -> None:
 31 |         if not self.base_dir.exists():
 32 |             self.base_dir.mkdir(parents=True, exist_ok=True)
 33 |             self.base_dir.chmod(0o750)
 34 |             logger.info(f'created base directory {self.base_dir} (mod: {self.base_dir.stat().st_mode:o})')
 35 |         if not self.data_dir.exists():
 36 |             self.data_dir.mkdir(parents=True, exist_ok=True)
 37 |             self.base_dir.chmod(0o750)
 38 |             logger.info(f'created data directory {self.data_dir} (mod: {self.base_dir.stat().st_mode:o})')
 39 |         with lock:
 40 |             self._db = sqlite3.connect(self.base_dir / 'db.sqlite3')
 41 |             self._db.execute('''
 42 |                 CREATE TABLE IF NOT EXISTS files (
 43 |                     key TEXT PRIMARY KEY,
 44 |                     type TEXT NOT NULL DEFAULT '',
 45 |                     size INTEGER NOT NULL CHECK (size >= 0)
 46 |                 )
 47 |             ''')
 48 | 
 49 |     def stop(self) -> None:
 50 |         with lock:
 51 |             self._db.close()
 52 | 
 53 |     def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
 54 |         if key.startswith('/'):
 55 |             key = key[1:]
 56 |         path = self.data_dir / key
 57 |         size = 0
 58 |         with open(path, 'wb') as f:
 59 |             while True:
 60 |                 chunk = data.read(self._file_buffering)
 61 |                 if isinstance(chunk, str):
 62 |                     chunk_bytes = chunk.encode('utf-8')
 63 |                 else:
 64 |                     chunk_bytes = chunk
 65 |                 if not chunk_bytes:
 66 |                     break
 67 |                 size += len(chunk_bytes)
 68 |                 f.write(chunk_bytes)
 69 |         path.chmod(0o640)
 70 |         data.seek(0)
 71 |         info = self.check(key)
 72 |         with lock:
 73 |             if info is None:
 74 |                 self._db.execute(
 75 |                     'INSERT INTO files (key, type, size) VALUES (?, ?, ?)',
 76 |                     (key, content_type, size),
 77 |                 )
 78 |             else:
 79 |                 self._db.execute(
 80 |                     'UPDATE files SET type = ?, size = ? WHERE key = ?',
 81 |                     (content_type, size, key),
 82 |                 )
 83 |         url = urljoin(self.public_endpoint, key)
 84 |         return urlparse(url)
 85 | 
 86 |     def check(self, key: str) -> ObjectInfo | None:
 87 |         if key.startswith('/'):
 88 |             key = key[1:]
 89 |         with lock:
 90 |             cursor = self._db.execute(
 91 |                 'SELECT type, size FROM files WHERE key = ?',
 92 |                 (key,),
 93 |             )
 94 |             row = cursor.fetchone()
 95 |         if row is None:
 96 |             return None
 97 |         content_type, size = row
 98 |         url = urljoin(self.public_endpoint, key)
 99 |         return ObjectInfo(type=content_type, uri=urlparse(url), size=size)
100 | 
101 |     @contextmanager
102 |     def get(self, key: str) -> Iterator[IO[bytes]]:
103 |         if key.startswith('/'):
104 |             key = key[1:]
105 |         path = self.data_dir / key
106 |         if not path.exists():
107 |             yield EMPTY_FILE
108 |         else:
109 |             with open(path, 'rb') as f:
110 |                 yield f
111 | 


--------------------------------------------------------------------------------
/podmaker/storage/s3.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __all__ = ['S3']
  4 | 
  5 | import base64
  6 | import hashlib
  7 | import logging
  8 | import sys
  9 | from contextlib import contextmanager
 10 | from tempfile import SpooledTemporaryFile
 11 | from typing import IO, AnyStr, Iterator
 12 | from urllib.parse import ParseResult, urljoin, urlparse
 13 | 
 14 | from podmaker.config import S3Config
 15 | from podmaker.storage import ObjectInfo, Storage
 16 | from podmaker.storage.core import EMPTY_FILE
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | try:
 21 |     import boto3
 22 |     from botocore.exceptions import ClientError
 23 | except ImportError:
 24 |     logger.error('boto3 is not installed, S3 storage is not available')
 25 |     sys.exit(1)
 26 | 
 27 | 
 28 | class S3(Storage):
 29 |     _md5_chunk_size = 10 * 1024 * 1024  # 10MB
 30 |     _file_buffering = 10 * 1024 * 1024  # 10MB
 31 | 
 32 |     def __init__(self, config: S3Config):
 33 |         self.s3 = boto3.resource(
 34 |             's3', endpoint_url=str(config.endpoint), aws_access_key_id=config.access_key,
 35 |             aws_secret_access_key=config.access_secret)
 36 |         self.bucket = self.s3.Bucket(config.bucket)
 37 |         self.public_endpoint = str(config.public_endpoint)
 38 | 
 39 |     def _calculate_md5(self, data: IO[AnyStr]) -> str:
 40 |         logger.debug('calculate md5')
 41 |         md5 = hashlib.md5()
 42 |         while True:
 43 |             chunk = data.read(self._md5_chunk_size)
 44 |             if not chunk:
 45 |                 break
 46 |             if isinstance(chunk, str):
 47 |                 md5.update(chunk.encode())
 48 |             elif isinstance(chunk, bytes):
 49 |                 md5.update(chunk)
 50 |             else:
 51 |                 raise TypeError(f'chunk must be str or bytes, not {type(chunk)}')
 52 |         data.seek(0)
 53 |         return base64.b64encode(md5.digest()).decode()
 54 | 
 55 |     def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
 56 |         if key.startswith('/'):
 57 |             key = key[1:]
 58 |         md5 = self._calculate_md5(data)
 59 |         logger.info(f'upload: {key} (md5: {md5})')
 60 |         self.bucket.put_object(Key=key, ContentMD5=md5, Body=data, ContentType=content_type)
 61 |         logger.info(f'uploaded: {key}')
 62 |         data.seek(0)
 63 |         return self.get_uri(key)
 64 | 
 65 |     def check(self, key: str) -> ObjectInfo | None:
 66 |         logger.debug(f'check: {key}')
 67 |         if key.startswith('/'):
 68 |             key = key[1:]
 69 |         try:
 70 |             info = self.bucket.Object(key=key)
 71 |             return ObjectInfo(
 72 |                 uri=self.get_uri(key),
 73 |                 size=info.content_length,
 74 |                 type=info.content_type
 75 |             )
 76 |         except ClientError:
 77 |             return None
 78 | 
 79 |     def get_uri(self, key: str) -> ParseResult:
 80 |         url = urljoin(self.public_endpoint, key)
 81 |         return urlparse(url)
 82 | 
 83 |     @contextmanager
 84 |     def get(self, key: str) -> Iterator[IO[bytes]]:
 85 |         logger.info(f'get: {key}')
 86 |         if key.startswith('/'):
 87 |             key = key[1:]
 88 |         with SpooledTemporaryFile(buffering=self._file_buffering) as f:
 89 |             try:
 90 |                 obj = self.bucket.Object(key=key).get()
 91 |                 while True:
 92 |                     chunk = obj['Body'].read(self._file_buffering)
 93 |                     if not chunk:
 94 |                         break
 95 |                     f.write(chunk)
 96 |                 f.seek(0)
 97 |                 yield f
 98 |             except ClientError:
 99 |                 logger.debug(f'not found: {key}')
100 |                 yield EMPTY_FILE
101 | 


--------------------------------------------------------------------------------
/podmaker/util/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['exit_signal', 'ExitSignalError', 'retry']
2 | 
3 | from podmaker.util.exit import ExitSignalError, exit_signal
4 | from podmaker.util.retry_util import retry
5 | 


--------------------------------------------------------------------------------
/podmaker/util/exit.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | import threading
 3 | from typing import Any, Callable
 4 | 
 5 | _exit_signals = (
 6 |     signal.SIGINT,
 7 |     signal.SIGHUP,
 8 |     signal.SIGTERM,
 9 | )
10 | 
11 | _lock = threading.Lock()
12 | 
13 | 
14 | class ExitSignalError(Exception):
15 |     pass
16 | 
17 | 
18 | class ExitSignalRegisterError(Exception):
19 |     pass
20 | 
21 | 
22 | class ExitSignal:
23 |     def __init__(self) -> None:
24 |         self._is_received = False
25 |         self._has_listened = False
26 |         self._exit_handlers: list[Callable[[], None]] = []
27 | 
28 |     def receive(self) -> None:
29 |         with _lock:
30 |             self._is_received = True
31 | 
32 |     def check(self) -> None:
33 |         with _lock:
34 |             if self._is_received:
35 |                 raise ExitSignalError('exit signal received')
36 | 
37 |     def register(self, handler: Callable[[], None]) -> None:
38 |         with _lock:
39 |             if self._has_listened:
40 |                 raise ExitSignalRegisterError('already listened')
41 |             self._exit_handlers.append(handler)
42 | 
43 |     def _handler(self, *_: Any) -> None:
44 |         self.receive()
45 |         for handler in self._exit_handlers:
46 |             handler()
47 | 
48 |     def listen(self) -> None:
49 |         with _lock:
50 |             self._has_listened = True
51 |             for sig in _exit_signals:
52 |                 signal.signal(sig, self._handler)
53 | 
54 | 
55 | exit_signal = ExitSignal()
56 | 


--------------------------------------------------------------------------------
/podmaker/util/retry_util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | import sys
 5 | import time
 6 | from datetime import timedelta
 7 | from typing import Callable, Tuple, Type, TypeVar
 8 | 
 9 | if sys.version_info < (3, 10):
10 |     from typing_extensions import ParamSpec
11 | else:
12 |     from typing import ParamSpec
13 | 
14 | 
15 | P = ParamSpec('P')
16 | T = TypeVar('T')
17 | _logger = logging.getLogger(__name__)
18 | 
19 | 
20 | def retry(
21 |         cnt: int,
22 |         *,
23 |         wait: timedelta = timedelta(seconds=0),
24 |         catch: Type[Exception] | Tuple[Type[Exception], ...] = Exception,
25 |         logger: logging.Logger = _logger,
26 | ) -> Callable[[Callable[P, T]], Callable[P, T]]:
27 |     """
28 |     A decorator to retry the function when exception raised.
29 |     The function will be called at least once and at most cnt + 1 times.
30 | 
31 |     :param cnt: retry count
32 |     :param wait: wait time between retries
33 |     :param catch: the exception to retry
34 |     :param logger: logger to log retry info
35 |     """
36 |     if cnt <= 0:
37 |         raise ValueError('cnt must be positive')
38 |     wait_seconds = wait.total_seconds()
39 | 
40 |     def deco(func: Callable[P, T]) -> Callable[P, T]:
41 |         def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
42 |             for _ in range(cnt):
43 |                 try:
44 |                     return func(*args, **kwargs)
45 |                 except catch:
46 |                     logger.warning('retrying...')
47 |                     if wait_seconds > 0:
48 |                         logger.warning(f'wait {wait_seconds}s before retry')
49 |                         time.sleep(wait_seconds)
50 |             return func(*args, **kwargs)
51 |         return wrapper
52 |     return deco
53 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "podmaker"
 3 | version = "0.9.0"
 4 | description = "Convert online media into podcast feeds."
 5 | license = "Unlicense"
 6 | authors = ["YogiLiu <YogiLiu@outlook.com>"]
 7 | maintainers = ["YogiLiu <YogiLiu@outlook.com>"]
 8 | readme = "README.md"
 9 | homepage = "https://github.com/YogiLiu/podmaker"
10 | repository = "https://github.com/YogiLiu/podmaker"
11 | documentation = "https://github.com/YogiLiu/podmaker/blob/main/README.md"
12 | keywords = ["rss", "youtube", "podcast"]
13 | classifiers = [
14 |     "Development Status :: 4 - Beta",
15 |     "Environment :: Console"
16 | ]
17 | 
18 | [tool.poetry.dependencies]
19 | python = "^3.9"
20 | tomlkit = "^0.12.1"
21 | pydantic = {extras = ["email"], version = "^2.2.0"}
22 | apscheduler = "^3.10.4"
23 | boto3 = { version = "^1.28.27", optional = true }
24 | yt-dlp = { version = "^2023.7.6", optional = true }
25 | 
26 | [tool.poetry.extras]
27 | s3 = ["boto3"]
28 | youtube = ["yt-dlp"]
29 | all = ["boto3", "yt-dlp"]
30 | 
31 | [tool.poetry.group.dev.dependencies]
32 | boto3-stubs = { extras = ["essential"], version = "^1.28.27" }
33 | autohooks = "^23.7.0"
34 | autohooks-plugin-ruff = "^23.6.1"
35 | autohooks-plugin-mypy = "^23.3.0"
36 | typing-extensions = "^4.7.1"
37 | 
38 | [tool.poetry.scripts]
39 | podmaker = 'podmaker.cli:run'
40 | 
41 | [tool.autohooks]
42 | mode = "poetry"
43 | pre-commit = ["autohooks.plugins.mypy", "autohooks.plugins.ruff"]
44 | 
45 | [tool.ruff]
46 | select = ["C90", "F", "I", "PL"]
47 | target-version = "py39"
48 | line-length = 120
49 | 
50 | [tool.mypy]
51 | python_version = "3.9"
52 | strict = true
53 | plugins = ["pydantic.mypy"]
54 | 
55 | [[tool.mypy.overrides]]
56 | module = ["yt_dlp", "apscheduler.*"]
57 | ignore_missing_imports = true
58 | 
59 | 
60 | [tool.commitizen]
61 | name = "cz_conventional_commits"
62 | tag_format = "$version"
63 | version_scheme = "pep440"
64 | version_provider = "poetry"
65 | update_changelog_on_bump = true
66 | major_version_zero = true
67 | 
68 | [build-system]
69 | requires = ["poetry-core"]
70 | build-backend = "poetry.core.masonry.api"
71 | 
72 | [tool.poetry.urls]
73 | "Bug Tracker" = "https://github.com/YogiLiu/podmaker/issues"
74 | 


--------------------------------------------------------------------------------
/systemd/podmaker.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Convert online media into podcast feeds.
 3 | Documentation=https://github.com/YogiLiu/podmaker/blob/main/README.md
 4 | After=network.target network-online.target
 5 | Wants=network-online.target
 6 | 
 7 | [Service]
 8 | User=nobody
 9 | Type=simple
10 | ExecStart=/opt/podmaker/venv/bin/podmaker -c /opt/podmaker/config.toml
11 | 
12 | [Install]
13 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data/apple.rss.test.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  3 |   <channel>
  4 |     <title>Hiking Treks</title>
  5 |     <link>https://www.apple.com/itunes/podcasts/</link>
  6 |     <language>en-us</language>
  7 |     <copyright>&#169; 2020 John Appleseed</copyright>
  8 |     <itunes:author>The Sunset Explorers</itunes:author>
  9 |     <description>
 10 |       Love to get outdoors and discover nature&apos;s treasures? Hiking Treks is the
 11 |       show for you. We review hikes and excursions, review outdoor gear and interview
 12 |       a variety of naturalists and adventurers. Look for new episodes each week.
 13 |     </description>
 14 |     <itunes:type>serial</itunes:type>
 15 |     <itunes:image
 16 |       href="https://applehosted.podcasts.apple.com/hiking_treks/artwork.png"
 17 |     />
 18 |     <itunes:category text="Sports">
 19 |       <itunes:category text="Wilderness"/>
 20 |     </itunes:category>
 21 |     <itunes:explicit>false</itunes:explicit>
 22 |     <item>
 23 |       <itunes:episodeType>trailer</itunes:episodeType>
 24 |       <itunes:title>Hiking Treks Trailer</itunes:title>
 25 |       <description>
 26 |           <![CDATA[The Sunset Explorers share tips, techniques and recommendations for
 27 |           great hikes and adventures around the United States. Listen on
 28 |           <a href="https://www.apple.com/itunes/podcasts/">Apple Podcasts</a>.]]>
 29 |       </description>
 30 |       <enclosure
 31 |         length="498537"
 32 |         type="audio/mpeg"
 33 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
 34 |       />
 35 |       <guid>D03EEC9B-B1B4-475B-92C8-54F853FA2A22</guid>
 36 |       <pubDate>Tue, 8 Jan 2019 01:15:00 GMT</pubDate>
 37 |       <itunes:duration>1079</itunes:duration>
 38 |       <itunes:explicit>false</itunes:explicit>
 39 |     </item>
 40 |     <item>
 41 |       <itunes:episodeType>full</itunes:episodeType>
 42 |       <itunes:episode>4</itunes:episode>
 43 |       <itunes:season>2</itunes:season>
 44 |       <title>S02 EP04 Mt. Hood, Oregon</title>
 45 |       <description>
 46 |         Tips for trekking around the tallest mountain in Oregon
 47 |       </description>
 48 |       <enclosure
 49 |         length="8727310"
 50 |         type="audio/x-m4a"
 51 |         url="http://example.com/podcasts/everything/mthood.m4a"
 52 |       />
 53 |       <guid>22BCFEBF-44FB-4A19-8229-7AC678629F57</guid>
 54 |       <pubDate>Tue, 07 May 2019 12:00:00 GMT</pubDate>
 55 |       <itunes:duration>1024</itunes:duration>
 56 |       <itunes:explicit>false</itunes:explicit>
 57 |     </item>
 58 |     <item>
 59 |       <itunes:episodeType>full</itunes:episodeType>
 60 |       <itunes:episode>3</itunes:episode>
 61 |       <itunes:season>2</itunes:season>
 62 |       <title>S02 EP03 Bouldering Around Boulder</title>
 63 |       <description>
 64 |         We explore fun walks to climbing areas about the beautiful Colorado city of Boulder.
 65 |       </description>
 66 |       <itunes:image
 67 |         href="http://example.com/podcasts/everything/AllAboutEverything/Episode2.jpg"
 68 |       />
 69 |       <link>href="http://example.com/podcasts/everything/</link>
 70 |       <enclosure
 71 |         length="5650889"
 72 |         type="video/mp4"
 73 |         url="http://example.com/podcasts/boulder.mp4"
 74 |       />
 75 |       <guid>BE486CAA-B3D5-4FB0-8298-EFEBE71C5982</guid>
 76 |       <pubDate>Tue, 30 Apr 2019 13:00:00 EST</pubDate>
 77 |       <itunes:duration>3627</itunes:duration>
 78 |       <itunes:explicit>false</itunes:explicit>
 79 |     </item>
 80 |     <item>
 81 |       <itunes:episodeType>full</itunes:episodeType>
 82 |       <itunes:episode>2</itunes:episode>
 83 |       <itunes:season>2</itunes:season>
 84 |       <title>S02 EP02 Caribou Mountain, Maine</title>
 85 |       <description>
 86 |         Put your fitness to the test with this invigorating hill climb.
 87 |       </description>
 88 |       <itunes:image
 89 |         href="http://example.com/podcasts/everything/AllAboutEverything/Episode3.jpg"
 90 |       />
 91 |       <enclosure
 92 |         length="5650889"
 93 |         type="audio/x-m4v"
 94 |         url="http://example.com/podcasts/everything/caribou.m4v"
 95 |       />
 96 |       <guid>142FAFE9-B1DF-4F6D-BAA8-79BDBAF653A9</guid>
 97 |       <pubDate>Tue, 23 May 2019 02:00:00 -0700</pubDate>
 98 |       <itunes:duration>2434</itunes:duration>
 99 |       <itunes:explicit>false</itunes:explicit>
100 |     </item>
101 |     <item>
102 |       <itunes:episodeType>full</itunes:episodeType>
103 |       <itunes:episode>1</itunes:episode>
104 |       <itunes:season>2</itunes:season>
105 |       <title>S02 EP01 Stawamus Chief</title>
106 |       <description>
107 |         We tackle Stawamus Chief outside of Vancouver, BC and you should too!
108 |       </description>
109 |       <enclosure
110 |         length="498537"
111 |         type="audio/mpeg"
112 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
113 |       />
114 |       <guid>5F1DBAEB-3327-49FB-ACB3-DB0158A1D0A3</guid>
115 |       <pubDate>2019-02-16T07:00:00.000Z</pubDate>
116 |       <itunes:duration>13:24</itunes:duration>
117 |       <itunes:explicit>false</itunes:explicit>
118 |     </item>
119 |     <item>
120 |       <itunes:episodeType>full</itunes:episodeType>
121 |       <itunes:episode>4</itunes:episode>
122 |       <itunes:season>1</itunes:season>
123 |       <title>S01 EP04 Kuliouou Ridge Trail</title>
124 |       <description>
125 |         Oahu, Hawaii, has some picturesque hikes and this is one of the best!
126 |       </description>
127 |       <enclosure
128 |         length="498537"
129 |         type="audio/mpeg"
130 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
131 |       />
132 |       <guid>B5FCEB80-317C-4CD0-A84B-807065B43FB9</guid>
133 |       <pubDate>Tue, 27 Nov 2018 01:15:00 +0000</pubDate>
134 |       <itunes:duration>929</itunes:duration>
135 |       <itunes:explicit>false</itunes:explicit>
136 |     </item>
137 |     <item>
138 |       <itunes:episodeType>full</itunes:episodeType>
139 |       <itunes:episode>3</itunes:episode>
140 |       <itunes:season>1</itunes:season>
141 |       <title>S01 EP03 Blood Mountain Loop</title>
142 |       <description>
143 |         Hiking the Appalachian Trail and Freeman Trail in Georgia
144 |       </description>
145 |       <enclosure
146 |         length="498537"
147 |         type="audio/mpeg"
148 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
149 |       />
150 |       <guid>F0C5D763-ED85-4449-9C09-81FEBDF6F126</guid>
151 |       <pubDate>Tue, 23 Oct 2018 01:15:00 +0000</pubDate>
152 |       <itunes:duration>1440</itunes:duration>
153 |       <itunes:explicit>false</itunes:explicit>
154 |     </item>
155 |     <item>
156 |       <itunes:episodeType>full</itunes:episodeType>
157 |       <itunes:episode>2</itunes:episode>
158 |       <itunes:season>1</itunes:season>
159 |       <title>S01 EP02 Garden of the Gods Wilderness</title>
160 |       <description>
161 |         Wilderness Area Garden of the Gods in Illinois is a delightful spot for
162 |         an extended hike.
163 |       </description>
164 |       <enclosure
165 |         length="498537"
166 |         type="audio/mpeg"
167 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
168 |       />
169 |       <guid>821DD0B2-571D-4DFD-8E11-556E8C1EFE6A</guid>
170 |       <pubDate>Tue, 18 Sep 2018 01:15:00 +0000</pubDate>
171 |       <itunes:duration>839</itunes:duration>
172 |       <itunes:explicit>false</itunes:explicit>
173 |     </item>
174 |     <item>
175 |       <itunes:episodeType>full</itunes:episodeType>
176 |       <itunes:episode>1</itunes:episode>
177 |       <itunes:season>1</itunes:season>
178 |       <title>S01 EP01 Upper Priest Lake Trail to Continental Creek Trail</title>
179 |       <description>
180 |         We check out this powerfully scenic hike following the river in the Idaho
181 |         Panhandle National Forests.
182 |       </description>
183 |       <enclosure
184 |         length="498537"
185 |         type="audio/mpeg"
186 |         url="http://example.com/podcasts/everything/AllAboutEverythingEpisode4.mp3"
187 |       />
188 |       <guid>EABDA7EE-1AC6-4B60-9E11-6B3F30B72F87</guid>
189 |       <pubDate>Tue, 14 Aug 2018 01:15:00 +0000</pubDate>
190 |       <itunes:duration>1399</itunes:duration>
191 |       <itunes:explicit>false</itunes:explicit>
192 |     </item>
193 |   </channel>
194 | </rss>


--------------------------------------------------------------------------------
/tests/data/google.rss.test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <rss version="2.0"
 3 |     xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
 4 |   <channel>
 5 |     <title>Dafna 的斑马饲养播客</title>
 6 |     <itunes:owner>
 7 |         <itunes:email>dafna@example.com</itunes:email>
 8 |     </itunes:owner>
 9 |     <itunes:author>Dafna</itunes:author>
10 |     <description>一个宠物主人关于饲养人气条纹马的指南</description>
11 |     <itunes:image href="https://www.example.com/podcasts/dafnas-zebras/img/dafna-zebra-pod-logo.jpg"/>
12 |     <language>zh-cn</language>
13 |     <link>https://www.example.com/podcasts/dafnas-zebras/</link>
14 |     <item>
15 |       <title>关于饲养斑马的十大误区</title>
16 |       <description>这里介绍了关于照顾、喂养和繁殖可爱条纹动物的十大误区。</description>
17 |       <pubDate>Tue, 14 Mar 2017 12:00:00 GMT</pubDate>
18 |       <enclosure url="https://www.example.com/podcasts/dafnas-zebras/audio/toptenmyths.mp3"
19 |                  type="audio/mpeg" length="34216300"/>
20 |       <itunes:duration>30:00</itunes:duration>
21 |       <guid isPermaLink="false">dzpodtop10</guid>
22 |     </item>
23 |     <item>
24 |       <title>让斑马保持整洁干净</title>
25 |       <description>让斑马保持干净非常耗时，但付出的努力是值得的。</description>
26 |       <pubDate>Fri, 24 Feb 2017 12:00:00 GMT</pubDate>
27 |       <enclosure url="https://www.example.com/podcasts/dafnas-zebras/audio/cleanstripes.mp3"
28 |                  type="audio/mpeg" length="26004388"/>
29 |       <itunes:duration>22:48</itunes:duration>
30 |       <guid>dzpodclean</guid>
31 |     </item>
32 |   </channel>
33 | </rss>


--------------------------------------------------------------------------------
/tests/helper.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | from urllib.error import URLError
 3 | from urllib.request import urlopen
 4 | 
 5 | 
 6 | def network_available(url: str, timeout: timedelta = timedelta(seconds=10)) -> bool:
 7 |     try:
 8 |         urlopen(url, timeout=timeout.total_seconds())
 9 |         return True
10 |     except URLError:
11 |         return False
12 | 


--------------------------------------------------------------------------------
/tests/provider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/provider/__init__.py


--------------------------------------------------------------------------------
/tests/provider/test_resource.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import unittest
 3 | 
 4 | from podmaker.rss import Resource
 5 | from podmaker.util import ExitSignalError, exit_signal
 6 | 
 7 | parent, child = multiprocessing.Pipe()
 8 | 
 9 | 
10 | def exit_signal_tester() -> None:
11 |     class Tester(Resource[None]):
12 |         def get(self) -> None:
13 |             return None
14 | 
15 |     t = Tester()
16 |     exit_signal.receive()
17 |     try:
18 |         t.get()
19 |     except BaseException as e:
20 |         child.send(e)
21 |     else:
22 |         child.send(None)
23 | 
24 | 
25 | class TestResource(unittest.TestCase):
26 |     def test_exit_signal(self) -> None:
27 |         p = multiprocessing.Process(target=exit_signal_tester)
28 |         p.start()
29 |         p.join()
30 |         self.assertIsInstance(parent.recv(), ExitSignalError)
31 | 


--------------------------------------------------------------------------------
/tests/provider/test_youtube.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import sys
  4 | import unittest
  5 | from datetime import date
  6 | from typing import IO, Any, AnyStr
  7 | from urllib.parse import ParseResult, urlparse
  8 | 
  9 | from podmaker.config import OwnerConfig, SourceConfig
 10 | from podmaker.fetcher.youtube import YouTube
 11 | from podmaker.storage import ObjectInfo, Storage
 12 | from tests.helper import network_available
 13 | 
 14 | if sys.version_info >= (3, 11):
 15 |     pass
 16 | else:
 17 |     pass
 18 | 
 19 | 
 20 | class MockStorage(Storage):
 21 |     cnt = 0
 22 | 
 23 |     def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
 24 |         assert data.name.endswith('.mp3'), 'only mp3 is supported'
 25 |         assert self.cnt % 2 == 1, 'file already exists'
 26 |         return urlparse('https://example.com')
 27 | 
 28 |     def check(self, key: str) -> ObjectInfo:
 29 |         self.cnt += 1
 30 |         return ObjectInfo(
 31 |             uri=urlparse('https://example.com'),
 32 |             size=0,
 33 |             type='audio/mp3'
 34 |         )
 35 | 
 36 |     def get(self, key: str) -> Any:
 37 |         pass
 38 | 
 39 | 
 40 | @unittest.skipUnless(network_available('https://www.youtube.com'), 'network is not available')
 41 | class TestYoutube(unittest.TestCase):
 42 |     cases = [
 43 |         {
 44 |             'source': SourceConfig(
 45 |                 id='youtube',
 46 |                 url='https://www.youtube.com/playlist?list=PLOU2XLYxmsILHvpAkROp2dXz-jQi4S4_y',
 47 |                 regex=r'Introduction to ARCore Augmented Faces, \w+'
 48 |             ),
 49 |             'attr': (
 50 |                 'Introduction to ARCore Augmented Faces',
 51 |                 'Learn how to use ARCore’s Augmented Faces APIs to create face effects with Unity, Android, and iOS.',
 52 |                 'Google for Developers',
 53 |             ),
 54 |             'items': [
 55 |                 ('8ih7eHwPoxM', 'Introduction to ARCore Augmented Faces, Unity', date.fromisoformat('2019-09-12')),
 56 |                 ('-4EvaCQpVEQ', 'Introduction to ARCore Augmented Faces, Android', date.fromisoformat('2019-09-12')),
 57 |                 ('QAqOTaCCD9M', 'Introduction to ARCore Augmented Faces, iOS', date.fromisoformat('2019-09-12')),
 58 |             ]
 59 |         },
 60 |         {
 61 |             'source': SourceConfig(
 62 |                 id='youtube',
 63 |                 url='https://www.youtube.com/@PyCon2015/videos'
 64 |             ),
 65 |             'attr': (
 66 |                 'PyCon 2015 - Videos',
 67 |                 '',
 68 |                 'PyCon 2015',
 69 |             ),
 70 |             'items': [
 71 |                 ('G-uKNd5TSBw', 'Keynote - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-16')),
 72 |                 ('lNqtyi3sM-k', 'Keynote  - Gabriella Coleman - PyCon 2015', date.fromisoformat('2015-04-16')),
 73 |                 ('2wDvzy6Hgxg', 'Type Hints  - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-12')),
 74 |             ]
 75 |         },
 76 |     ]
 77 | 
 78 |     def setUp(self) -> None:
 79 |         storage = MockStorage()
 80 |         self.youtube = YouTube(
 81 |             storage,
 82 |             OwnerConfig(name='Podmaker', email='test@podmaker.dev')
 83 |         )
 84 | 
 85 |     def test_fetch(self) -> None:
 86 |         for case in self.cases:
 87 |             source = case['source']
 88 |             attr = case['attr']
 89 |             podcast = self.youtube.fetch(source)  # type: ignore[arg-type]
 90 |             self.assertEqual(urlparse(str(source.url)), podcast.link)  # type: ignore[attr-defined]
 91 |             self.assertEqual(attr[0], podcast.title)  # type: ignore[index]
 92 |             self.assertIsNotNone(podcast.image.ensure())
 93 |             self.assertEqual(attr[1], podcast.description)  # type: ignore[index]
 94 |             self.assertEqual('Podmaker', podcast.owner.name)  # type: ignore[union-attr]
 95 |             self.assertEqual('test@podmaker.dev', podcast.owner.email)  # type: ignore[union-attr]
 96 |             self.assertEqual(attr[2], podcast.author)  # type: ignore[index]
 97 |             self.assertEqual([], podcast.categories)
 98 |             self.assertFalse(podcast.explicit)
 99 |             self.assertEqual('en', podcast.language)
100 |             items = case['items']
101 |             for (idx, episode) in enumerate(podcast.items.ensure()):
102 |                 if idx >= len(items):  # type: ignore[arg-type]
103 |                     break
104 |                 current = items[idx]  # type: ignore[index]
105 |                 self.assertEqual(current[0], episode.guid)
106 |                 self.assertEqual(current[1], episode.title)
107 |                 self.assertIsNotNone(episode.pub_date)
108 |                 if episode.pub_date is not None:
109 |                     self.assertEqual(current[2], episode.pub_date.date())
110 |                 self.assertIsNotNone(episode.link)
111 |                 self.assertIsNotNone(episode.image.ensure())  # type: ignore[union-attr]
112 |                 self.assertEqual(urlparse('https://example.com'), episode.enclosure.ensure().url)
113 | 


--------------------------------------------------------------------------------
/tests/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/storage/__init__.py


--------------------------------------------------------------------------------
/tests/storage/test_local.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | from io import BytesIO
 4 | from pathlib import Path
 5 | 
 6 | from podmaker.config import LocalConfig
 7 | from podmaker.storage.local import Local
 8 | 
 9 | file_size = 10
10 | 
11 | 
12 | class TestS3(unittest.TestCase):
13 |     base_dir = Path('/tmp/podmaker')
14 |     data_dir = base_dir / 'data'
15 | 
16 |     def setUp(self) -> None:
17 |         self.storage = Local(
18 |             LocalConfig(dest='local', base_dir='/tmp/podmaker', public_endpoint='http://localhost:9000')
19 |         )
20 |         self.storage.start()
21 |         self.file = BytesIO()
22 |         self.file.write(random.randbytes(file_size))
23 |         self.file.seek(0)
24 | 
25 |     def tearDown(self) -> None:
26 |         self.storage.stop()
27 | 
28 |     # noinspection DuplicatedCode
29 |     def test_s3(self) -> None:
30 |         for _ in range(2):
31 |             result = self.storage.put(self.file, key='/test.bin', content_type='application/octet-stream')
32 |             self.assertEqual('http://localhost:9000/test.bin', result.geturl())
33 |             self.assertTrue((self.data_dir / 'test.bin').exists())
34 |             info = self.storage.check(key='/test.bin')
35 |             self.assertIsNotNone(info)
36 |             if info is not None:
37 |                 self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl())
38 |                 self.assertEqual(self.file.getbuffer().nbytes, info.size)
39 |                 self.assertEqual('application/octet-stream', info.type)
40 |             with self.storage.get(key='/test.bin') as f:
41 |                 self.assertEqual(self.file.read(), f.read())
42 |                 self.file.seek(0)
43 | 
44 |     def test_check_empty(self) -> None:
45 |         r = self.storage.check(key='/empty.bin')
46 |         self.assertIsNone(r)
47 | 


--------------------------------------------------------------------------------
/tests/storage/test_s3.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | from dataclasses import dataclass
 4 | from io import BytesIO
 5 | from typing import Any, Type
 6 | from unittest.mock import patch
 7 | from urllib.parse import ParseResult, urlparse
 8 | 
 9 | import boto3
10 | from botocore.exceptions import ClientError
11 | 
12 | from podmaker.config import S3Config
13 | from podmaker.storage.s3 import S3
14 | 
15 | file_size = 10
16 | 
17 | 
18 | @dataclass
19 | class MockedObject:
20 |     content_length: int
21 |     content_type: str
22 | 
23 | 
24 | # noinspection PyPep8Naming
25 | class MockedBucket:
26 |     @staticmethod
27 |     def put_object(*, Key: str, **__: Any) -> ParseResult:
28 |         return urlparse(f'http://localhost:9000/{Key}')
29 | 
30 |     @staticmethod
31 |     def Object(*, key: str) -> MockedObject:
32 |         if key == 'empty.bin':
33 |             raise ClientError(error_response={}, operation_name='GetObject')
34 |         return MockedObject(content_type='application/octet-stream', content_length=file_size)
35 | 
36 | 
37 | # noinspection PyPep8Naming
38 | class MockedServiceResource:
39 |     @staticmethod
40 |     def Bucket(*_: Any, **__: Any) -> MockedBucket:
41 |         return MockedBucket()
42 | 
43 | 
44 | def mock_resource(*_: Any, **__: Any) -> Type[MockedServiceResource]:
45 |     return MockedServiceResource
46 | 
47 | 
48 | class TestS3(unittest.TestCase):
49 |     @patch.object(boto3, 'resource', mock_resource)
50 |     def setUp(self) -> None:
51 |         self.s3 = S3(
52 |             S3Config(
53 |                 dest='s3',
54 |                 access_key='123',
55 |                 access_secret='456',
56 |                 bucket='podmaker',
57 |                 endpoint='http://localhost:9000',
58 |                 public_endpoint='http://localhost:9000'
59 |             )
60 |         )
61 |         self.file = BytesIO()
62 |         self.file.write(random.randbytes(file_size))
63 |         self.file.seek(0)
64 | 
65 |     def test_s3(self) -> None:
66 |         for _ in range(2):
67 |             result = self.s3.put(self.file, key='/test.bin', content_type='application/octet-stream')
68 |             self.assertEqual('http://localhost:9000/test.bin', result.geturl())
69 |             info = self.s3.check(key='/test.bin')
70 |             self.assertIsNotNone(info)
71 |             if info is not None:
72 |                 self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl())
73 |                 self.assertEqual(self.file.getbuffer().nbytes, info.size)
74 |                 self.assertEqual('application/octet-stream', info.type)
75 | 
76 |     def test_check_empty(self) -> None:
77 |         r = self.s3.check(key='/empty.bin')
78 |         self.assertIsNone(r)
79 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | from pathlib import Path
 4 | 
 5 | from podmaker.config import PMConfig
 6 | 
 7 | if sys.version_info >= (3, 11):
 8 |     import tomllib as toml
 9 | else:
10 |     import tomlkit as toml
11 | 
12 | 
13 | class TestConfig(unittest.TestCase):
14 |     def setUp(self) -> None:
15 |         self.path = Path(__file__).parent.parent / 'config.example.toml'
16 | 
17 |     def test_from_file(self) -> None:
18 |         config = PMConfig.from_file(self.path)
19 |         self.assertEqual(toml.loads(self.path.read_text()), config.model_dump(mode='json'))
20 | 


--------------------------------------------------------------------------------
/tests/test_rss.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | import unittest
  5 | from datetime import datetime, timezone
  6 | from email.utils import parsedate_to_datetime
  7 | from pathlib import Path
  8 | from typing import Any, Callable
  9 | from urllib.parse import urlparse
 10 | from xml.etree.ElementTree import Element, fromstring
 11 | 
 12 | from podmaker.rss import Episode, Podcast
 13 | from podmaker.rss.core import PlainResource, Resource, itunes
 14 | 
 15 | 
 16 | def convert_to_seconds(duration: str) -> int:
 17 |     if ':' in duration:
 18 |         secs = 0
 19 |         for c in duration.split(':'):
 20 |             secs = secs * 60 + int(c)
 21 |     else:
 22 |         secs = int(duration)
 23 |     return secs
 24 | 
 25 | 
 26 | def find_strip_text(el: Element, path: str, namespaces: dict[str, str] | None = None) -> str | None:
 27 |     text = el.findtext(path, namespaces=namespaces)
 28 |     if text:
 29 |         return text.strip()
 30 |     return None
 31 | 
 32 | 
 33 | class TestRSS(unittest.TestCase):
 34 |     def setUp(self) -> None:
 35 |         self.rss_docs = [
 36 |             Path('data/apple.rss.test.xml').read_text(),
 37 |             Path('data/google.rss.test.xml').read_text(),
 38 |         ]
 39 |         self.elements = [
 40 |             fromstring(r)
 41 |             for r in self.rss_docs
 42 |         ]
 43 | 
 44 |     def test_from_rss(self) -> None:  # noqa: PLR0912, C901, PLR0915
 45 |         for i, element in enumerate(self.elements):
 46 |             doc = self.rss_docs[i]
 47 |             podcast = Podcast.from_rss(doc)
 48 |             self.assertEqual(find_strip_text(element, '.channel/link'), podcast.link.geturl())
 49 |             self.assertEqual(find_strip_text(element, '.channel/title'), podcast.title)
 50 |             self.assertEqual(
 51 |                 element.find(
 52 |                     f'.channel/{itunes("image")}', namespaces=itunes.namespace
 53 |                 ).get('href'),  # type: ignore[union-attr]
 54 |                 podcast.image.ensure().geturl()
 55 |             )
 56 |             self.assertEqual(find_strip_text(element, '.channel/description'), podcast.description)
 57 |             owner_el = element.find(f'.channel/{itunes("owner")}', namespaces=itunes.namespace)
 58 |             if owner_el is not None:
 59 |                 owner_name = find_strip_text(owner_el, f'.{itunes("name")}')
 60 |                 if owner_name:
 61 |                     self.assertEqual(owner_name, podcast.owner.name)  # type: ignore[union-attr]
 62 |                 else:
 63 |                     self.assertIsNone(podcast.owner.name)  # type: ignore[union-attr]
 64 |                 self.assertEqual(
 65 |                     find_strip_text(owner_el, f'.{itunes("email")}'),
 66 |                     podcast.owner.email  # type: ignore[union-attr]
 67 |                 )
 68 |             self.assertEqual(
 69 |                 find_strip_text(element, f'.channel/{itunes("author")}', namespaces=itunes.namespace),
 70 |                 podcast.author
 71 |             )
 72 |             c_els = element.findall(f'.channel/{itunes("category")}', namespaces=itunes.namespace)
 73 |             self.assertEqual(
 74 |                 [c_el.text.strip() for c_el in c_els],  # type: ignore[union-attr]
 75 |                 podcast.categories
 76 |             )
 77 |             explicit = find_strip_text(element, f'.channel/{itunes("explicit")}', namespaces=itunes.namespace)
 78 |             if explicit == 'yes':
 79 |                 self.assertTrue(podcast.explicit)
 80 |             else:
 81 |                 self.assertFalse(podcast.explicit)
 82 |             language = find_strip_text(element, '.channel/language')
 83 |             if language:
 84 |                 self.assertEqual(language, podcast.language)
 85 |             else:
 86 |                 self.assertIsNone(podcast.explicit)
 87 |             item_els = element.findall('.channel/item')
 88 |             for j, item in enumerate(podcast.items.ensure()):
 89 |                 el = item_els[j]
 90 |                 enclosure_el = el.find('.enclosure')
 91 |                 self.assertEqual(
 92 |                     enclosure_el.get('url'),  # type: ignore[union-attr]
 93 |                     item.enclosure.ensure().url.geturl()
 94 |                 )
 95 |                 self.assertEqual(
 96 |                     enclosure_el.get('type'),  # type: ignore[union-attr]
 97 |                     item.enclosure.ensure().type
 98 |                 )
 99 |                 self.assertEqual(
100 |                     enclosure_el.get('length'),  # type: ignore[union-attr]
101 |                     str(item.enclosure.ensure().length)
102 |                 )
103 |                 if find_strip_text(el, '.title'):
104 |                     self.assertEqual(find_strip_text(el, '.title'), item.title)
105 |                 else:
106 |                     self.assertEqual(find_strip_text(el, f'.{itunes("title")}', namespaces=itunes.namespace),
107 |                                      item.title)
108 |                 desc = find_strip_text(el, '.description')
109 |                 if desc:
110 |                     self.assertEqual(desc, item.description)
111 |                 summary = find_strip_text(el, f'.{itunes("summary")}')
112 |                 if summary:
113 |                     self.assertEqual(summary, item.description)
114 |                 explicit = find_strip_text(el, f'.{itunes("explicit")}', namespaces=itunes.namespace)
115 |                 if explicit == 'yes':
116 |                     self.assertTrue(item.explicit)
117 |                 elif explicit == 'no':
118 |                     self.assertFalse(item.explicit)
119 |                 else:
120 |                     self.assertFalse(item.explicit)
121 |                 self.assertEqual(el.find('.guid').text, item.guid)  # type: ignore[union-attr]
122 |                 duration = find_strip_text(el, f'.{itunes("duration")}', namespaces=itunes.namespace)
123 |                 if duration:
124 |                     if ':' in duration:
125 |                         secs = 0
126 |                         for c in duration.split(':'):
127 |                             secs = secs * 60 + int(c)
128 |                     else:
129 |                         secs = int(duration)
130 |                     self.assertEqual(secs, math.ceil(item.duration.total_seconds()))  # type: ignore[union-attr]
131 |                 else:
132 |                     self.assertIsNone(item.duration)
133 |                 pub_date = find_strip_text(el, 'pubDate')
134 |                 if pub_date:
135 |                     try:
136 |                         dt = parsedate_to_datetime(pub_date)
137 |                     except (TypeError, ValueError):
138 |                         if pub_date.endswith('Z'):
139 |                             pub_date = pub_date[:-1] + '+00:00'
140 |                         dt = datetime.fromisoformat(pub_date)
141 |                     self.assertEqual(dt.date(), item.pub_date.date())  # type: ignore[union-attr]
142 |                     self.assertEqual(dt.time(), item.pub_date.time())  # type: ignore[union-attr]
143 |                 else:
144 |                     self.assertIsNone(item.pub_date)
145 | 
146 |     def test_xml(self) -> None:  # noqa: PLR0912, C901
147 |         cases: list[str | list[str] | dict[str, Any]] = [
148 |             '.',
149 |             '.channel',
150 |             '.channel/title',
151 |             f'.channel/{itunes("owner")}/{itunes("email")}',
152 |             f'.channel/{itunes("author")}',
153 |             '.channel/description',
154 |             [
155 |                 '.channel/description',
156 |                 f'.channel/{itunes("summary")}'
157 |             ],
158 |             f'.channel/{itunes("image")}',
159 |             [
160 |                 '.channel/title',
161 |                 '.channel/image/title'
162 |             ],
163 |             [
164 |                 '.channel/link',
165 |                 '.channel/image/link'
166 |             ],
167 |             {
168 |                 'a': f'.channel/{itunes("image")}',
169 |                 'b': '.channel/image/url',
170 |                 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
171 |             },
172 |             '.channel/language',
173 |             '.channel/link',
174 |             '.channel/item/[1]/title',
175 |             '.channel/item/[1]/description',
176 |             '.channel/item/[1]/pubDate',
177 |             '.channel/item/[1]/enclosure',
178 |             f'.channel/item/[1]/{itunes("duration")}',
179 |             '.channel/item/[1]/guid',
180 |             '.channel/item/[1]/link',
181 |             {
182 |                 'a': f'.channel/item/[1]/{itunes("image")}',
183 |                 'b': f'.channel/item/[1]/{itunes("image")}',
184 |                 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
185 |             },
186 |             '.channel/item/[2]/title',
187 |             '.channel/item/[2]/description',
188 |             '.channel/item/[2]/pubDate',
189 |             '.channel/item/[2]/enclosure',
190 |             f'.channel/item/[2]/{itunes("duration")}',
191 |             '.channel/item/[2]/guid',
192 |             '.channel/item/[2]/link',
193 |             {
194 |                 'a': f'.channel/item/[2]/{itunes("image")}',
195 |                 'b': f'.channel/item/[3]/{itunes("image")}',
196 |                 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
197 |             },
198 |         ]
199 |         for idx, element in enumerate(self.elements):
200 |             doc = self.rss_docs[idx]
201 |             podcast = Podcast.from_rss(doc)
202 |             xml = podcast.xml
203 |             for case in cases:
204 |                 if isinstance(case, dict):
205 |                     a = element.find(case['a'])
206 |                     if a is None:
207 |                         continue
208 |                     b = xml.find(case['b'])
209 |                     action: Callable[[Element], Any] = case['action']
210 |                     self.assertEqual(action(a), action(b), case)  # type: ignore[arg-type]
211 |                 else:
212 |                     if isinstance(case, list):
213 |                         a = element.find(case[0])
214 |                         b = xml.find(case[1])
215 |                     else:
216 |                         a = element.find(case)
217 |                         b = xml.find(case)
218 |                     if a is None:
219 |                         continue
220 |                     if a.text:
221 |                         a.text = a.text.strip()
222 |                     if b.text:  # type: ignore[union-attr]
223 |                         b.text = b.text.strip()  # type: ignore[union-attr]
224 |                     a_t = a.text or a.attrib.pop('text', '')
225 |                     b_t = b.text or b.attrib.pop('text', '')  # type: ignore[union-attr]
226 |                     if 'pubDate' in case:
227 |                         self.assertEqual(
228 |                             parsedate_to_datetime(a_t), parsedate_to_datetime(b_t), case)  # type: ignore[arg-type]
229 |                     elif 'duration' in case:
230 |                         self.assertEqual(convert_to_seconds(a_t), convert_to_seconds(b_t),  # type: ignore[arg-type]
231 |                                          case)
232 |                     else:
233 |                         self.assertEqual(a_t, b_t, case)
234 |                     b_attr = b.attrib.copy()  # type: ignore[union-attr]
235 |                     if 'isPermaLink' not in a.attrib:
236 |                         b_attr.pop('isPermaLink', None)
237 |                     self.assertEqual(a.attrib, b_attr, case)
238 | 
239 |     def test_merge(self) -> None:
240 |         for doc in self.rss_docs:
241 |             ap = Podcast.from_rss(doc)
242 |             bp = Podcast.from_rss(doc)
243 |             self.assertFalse(ap.merge(bp))
244 |             items = list(bp.items.ensure())
245 |             items.insert(
246 |                 0,
247 |                 Episode(
248 |                     enclosure=items[0].enclosure,
249 |                     title='foo',
250 |                     description='bar',
251 |                     guid='baz',
252 |                     duration=items[0].duration,
253 |                     explicit=False,
254 |                     pub_date=datetime.now(timezone.utc),
255 |                 )
256 |             )
257 |             cases = [
258 |                 ('items', PlainResource(items)),
259 |                 ('link', urlparse('https://example.com')),
260 |                 ('title', 'foo'),
261 |                 ('image', PlainResource(urlparse('https://example.com/image.png'))),
262 |                 ('description', 'bar'),
263 |                 ('author', 'baz'),
264 |                 ('categories', ['foo', 'bar']),
265 |                 ('explicit', True),
266 |                 ('language', 'ja'),
267 |             ]
268 |             for field, value in cases:
269 |                 setattr(bp, field, value)
270 |                 self.assertTrue(ap.merge(bp), f'{field} is not merged')
271 |                 if isinstance(value, Resource):
272 |                     ar = getattr(ap, field).get()
273 |                     br = getattr(bp, field).get()
274 |                     if isinstance(ar, list):
275 |                         ar = set(ar)
276 |                         br = set(br)
277 |                     self.assertEqual(ar, br, f'{field} is not merged: {value}')
278 |                 else:
279 |                     self.assertEqual(getattr(ap, field), value, f'{field} is not merged: {value}')
280 | 


--------------------------------------------------------------------------------
/tests/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/util/__init__.py


--------------------------------------------------------------------------------
/tests/util/test_retry.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import mock
 3 | 
 4 | from podmaker.util import retry
 5 | 
 6 | 
 7 | class TestRetry(unittest.TestCase):
 8 |     def test_no_exception(self) -> None:
 9 |         spy = mock.Mock(return_value=1)
10 |         func = retry(3)(spy)
11 |         self.assertEqual(1, func())
12 |         self.assertEqual(1, spy.call_count)
13 | 
14 |     def test_retry_success(self) -> None:
15 |         spy = mock.Mock(side_effect=[Exception, 1])
16 |         func = retry(3)(spy)
17 |         self.assertEqual(1, func())
18 |         self.assertEqual(2, spy.call_count)
19 | 
20 |     def test_retry_failed(self) -> None:
21 |         spy = mock.Mock(side_effect=Exception)
22 |         func = retry(3)(spy)
23 |         self.assertRaises(Exception, func)
24 |         self.assertEqual(4, spy.call_count)
25 | 
26 |     def test_specify_exception(self) -> None:
27 |         spy = mock.Mock(side_effect=ValueError)
28 |         func = retry(3, catch=TypeError)(spy)
29 |         self.assertRaises(ValueError, func)
30 |         self.assertEqual(1, spy.call_count)
31 | 
32 |         spy = mock.Mock(side_effect=ValueError)
33 |         func = retry(3, catch=ValueError)(spy)
34 |         self.assertRaises(ValueError, func)
35 |         self.assertEqual(4, spy.call_count)
36 | 


--------------------------------------------------------------------------------