├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── README.zh_CN.md
├── config.example.toml
├── podmaker
├── __init__.py
├── __main__.py
├── cli.py
├── config
│ ├── __init__.py
│ ├── core.py
│ └── storage.py
├── fetcher
│ ├── __init__.py
│ ├── core.py
│ └── youtube.py
├── processor
│ ├── __init__.py
│ ├── core.py
│ ├── scheduling.py
│ └── task.py
├── rss
│ ├── README.md
│ ├── __init__.py
│ ├── core.py
│ ├── enclosure.py
│ ├── episode.py
│ ├── podcast.py
│ └── util
│ │ ├── __init__.py
│ │ ├── namespace.py
│ │ └── parse.py
├── storage
│ ├── __init__.py
│ ├── core.py
│ ├── local.py
│ └── s3.py
└── util
│ ├── __init__.py
│ ├── exit.py
│ └── retry_util.py
├── poetry.lock
├── pyproject.toml
├── systemd
└── podmaker.service
└── tests
├── __init__.py
├── data
├── apple.rss.test.xml
└── google.rss.test.xml
├── helper.py
├── provider
├── __init__.py
├── test_resource.py
└── test_youtube.py
├── storage
├── __init__.py
├── test_local.py
└── test_s3.py
├── test_config.py
├── test_rss.py
└── util
├── __init__.py
└── test_retry.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .vscode/
3 | config.toml
4 |
5 | # Created by https://www.toptal.com/developers/gitignore/api/python
6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
7 |
8 | ### Python ###
9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .nox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | *.py,cover
58 | .hypothesis/
59 | .pytest_cache/
60 | cover/
61 |
62 | # Translations
63 | *.mo
64 | *.pot
65 |
66 | # Django stuff:
67 | *.log
68 | local_settings.py
69 | db.sqlite3
70 | db.sqlite3-journal
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 |
82 | # PyBuilder
83 | .pybuilder/
84 | target/
85 |
86 | # Jupyter Notebook
87 | .ipynb_checkpoints
88 |
89 | # IPython
90 | profile_default/
91 | ipython_config.py
92 |
93 | # pyenv
94 | # For a library or package, you might want to ignore these files since the code is
95 | # intended to run in multiple environments; otherwise, check them in:
96 | # .python-version
97 |
98 | # pipenv
99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | # install all needed dependencies.
103 | #Pipfile.lock
104 |
105 | # poetry
106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107 | # This is especially recommended for binary packages to ensure reproducibility, and is more
108 | # commonly ignored for libraries.
109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110 | #poetry.lock
111 |
112 | # pdm
113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114 | #pdm.lock
115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116 | # in version control.
117 | # https://pdm.fming.dev/#use-with-ide
118 | .pdm.toml
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | ### Python Patch ###
171 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
172 | poetry.toml
173 |
174 | # ruff
175 | .ruff_cache/
176 |
177 | # LSP config files
178 | pyrightconfig.json
179 |
180 | # End of https://www.toptal.com/developers/gitignore/api/python
181 |
182 |
183 | # Created by https://www.toptal.com/developers/gitignore/api/linux
184 | # Edit at https://www.toptal.com/developers/gitignore?templates=linux
185 |
186 | ### Linux ###
187 | *~
188 |
189 | # temporary files which can be created if a process still has a handle open of a deleted file
190 | .fuse_hidden*
191 |
192 | # KDE directory preferences
193 | .directory
194 |
195 | # Linux trash folder which might appear on any partition or disk
196 | .Trash-*
197 |
198 | # .nfs files are created when an open file is removed but is still being accessed
199 | .nfs*
200 |
201 | # End of https://www.toptal.com/developers/gitignore/api/linux
202 |
203 | # Created by https://www.toptal.com/developers/gitignore/api/macos
204 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos
205 |
206 | ### macOS ###
207 | # General
208 | .DS_Store
209 | .AppleDouble
210 | .LSOverride
211 |
212 | # Icon must end with two \r
213 | Icon
214 |
215 |
216 | # Thumbnails
217 | ._*
218 |
219 | # Files that might appear in the root of a volume
220 | .DocumentRevisions-V100
221 | .fseventsd
222 | .Spotlight-V100
223 | .TemporaryItems
224 | .Trashes
225 | .VolumeIcon.icns
226 | .com.apple.timemachine.donotpresent
227 |
228 | # Directories potentially created on remote AFP share
229 | .AppleDB
230 | .AppleDesktop
231 | Network Trash Folder
232 | Temporary Items
233 | .apdisk
234 |
235 | ### macOS Patch ###
236 | # iCloud generated files
237 | *.icloud
238 |
239 | # End of https://www.toptal.com/developers/gitignore/api/macos
240 |
241 | # Created by https://www.toptal.com/developers/gitignore/api/windows
242 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows
243 |
244 | ### Windows ###
245 | # Windows thumbnail cache files
246 | Thumbs.db
247 | Thumbs.db:encryptable
248 | ehthumbs.db
249 | ehthumbs_vista.db
250 |
251 | # Dump file
252 | *.stackdump
253 |
254 | # Folder config file
255 | [Dd]esktop.ini
256 |
257 | # Recycle Bin used on file shares
258 | $RECYCLE.BIN/
259 |
260 | # Windows Installer files
261 | *.cab
262 | *.msi
263 | *.msix
264 | *.msm
265 | *.msp
266 |
267 | # Windows shortcuts
268 | *.lnk
269 |
270 | # End of https://www.toptal.com/developers/gitignore/api/windows
271 |
272 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## 0.9.0 (2023-09-11)
2 |
3 | ### Feat
4 |
5 | - **prrocessor.scheduling**: support custom interval
6 |
7 | ## 0.8.1 (2023-09-01)
8 |
9 | ### Fix
10 |
11 | - fix typo
12 |
13 | ## 0.8.0 (2023-09-01)
14 |
15 | ### BREAKING CHANGE
16 |
17 | - Should install yt_dlp and boto3 by extra dependencies.
18 |
19 | ### Feat
20 |
21 | - **fetcher,storage**: make yt_dlp and boto3 optional
22 | - **util**: add retry decorator
23 | - **fetcher.core,processor**: add start and stop hook to fetcher
24 |
25 | ### Fix
26 |
27 | - **util**: rename retry module to retry_util
28 |
29 | ## 0.7.4 (2023-08-27)
30 |
31 | ### Fix
32 |
33 | - **rss.podcast**: fix pu_bdate format
34 | - **fetcher.processor**: check exit signal
35 |
36 | ## 0.7.3 (2023-08-26)
37 |
38 | ### Fix
39 |
40 | - **config**: fix tomlkit
41 |
42 | ## 0.7.2 (2023-08-26)
43 |
44 | ### Fix
45 |
46 | - **config.core**: fix union
47 |
48 | ## 0.7.1 (2023-08-26)
49 |
50 | ### Fix
51 |
52 | - **config.storage**: remove absmeta
53 |
54 | ## 0.7.0 (2023-08-25)
55 |
56 | ### Feat
57 |
58 | - **storage**: support local storage
59 |
60 | ### Fix
61 |
62 | - **cli**: support local storage
63 | - **rss.podcast**: fix items merge
64 | - **fetcher.youtube**: add source id to skip log
65 |
66 | ## 0.6.1 (2023-08-25)
67 |
68 | ### Fix
69 |
70 | - **fetcher.youtube**: cache dir
71 |
72 | ## 0.6.0 (2023-08-25)
73 |
74 | ### Feat
75 |
76 | - **fetcher.youtube**: add source id to log
77 |
78 | ## 0.5.0 (2023-08-25)
79 |
80 | ### Feat
81 |
82 | - **storage**: support start and stop storage
83 | - **config**: support filter episodes by regex
84 | - **config**: use storage instead s3
85 |
86 | ## 0.4.0 (2023-08-24)
87 |
88 | ### Feat
89 |
90 | - **fetcher**: support youtube channel
91 |
92 | ### Fix
93 |
94 | - **rss.core**: remove stylesheet
95 | - **fetcher.youtube**: catch download error
96 | - **rss.podcast**: fix image url
97 |
98 | ## 0.3.1 (2023-08-23)
99 |
100 | ### Fix
101 |
102 | - **peocessor.task**: fix mime
103 | - **asset**: fix script url
104 |
105 | ## 0.3.0 (2023-08-23)
106 |
107 | ### BREAKING CHANGE
108 |
109 | - changes for config file
110 |
111 | ### Feat
112 |
113 | - **rss**: add stylesheet
114 | - add exit signal
115 |
116 | ### Fix
117 |
118 | - **rss.core**: fix encoding of rss bytes
119 | - **config**: change s3.cdn_prefix to s3.public_endpoint
120 |
121 | ### Refactor
122 |
123 | - **processor**: move execution to Task class, and support task hook
124 |
125 | ## 0.2.2 (2023-08-22)
126 |
127 | ### Fix
128 |
129 | - **fetcher.youtube**: fetch image and link for episode
130 |
131 | ## 0.2.1 (2023-08-21)
132 |
133 | ### Fix
134 |
135 | - **config**: quote id before used to generate storage key
136 |
137 | ## 0.2.0 (2023-08-21)
138 |
139 | ### Feat
140 |
141 | - add cli
142 | - **processor**: add processor
143 | - **rss**: support mergation
144 | - **rss**: use qname to manage namespace
145 | - **rss**: support load rss object from xml string
146 | - **rss.core**: add plain resource and rss deserializer
147 | - **config**: add source config
148 | - **config**: support optional env and required env
149 | - add config
150 | - **rss**: add rss generator and serializer
151 | - complete youtube parser and s3 storage
152 |
153 | ### Fix
154 |
155 | - **rss**: compatible with apple's requirements
156 | - **processor.scheduling**: add next run time to add_job
157 | - **config**: fix decorator
158 | - **config**: fix tomlkit
159 | - **processor.scheduling**: fix shutdown
160 | - **fetcher.youtube**: fix logger
161 | - **config**: rename source.name to source.id
162 | - **processor.schedulling**: fix shutdown
163 | - **processor.core**: fix rss key
164 | - **rss**: fix text
165 | - **config**: add app config
166 | - **processor.core**: fix original file
167 | - change cli argument
168 | - **fetcher.youtube**: add lock
169 | - **fetcher**: rename parser to fetcher
170 | - **parser.youtube,-storage.s3**: remove redundant config dependency
171 | - **rss**: reduce public class
172 | - **env**: use dataclass as env object
173 | - **rss.podcast**: fix category pattern
174 | - supplement podcast field
175 |
176 | ### Refactor
177 |
178 | - **cli**: remove cli logic to cli module
179 | - add log
180 | - **rss**: hide unnecessary property
181 | - **parser.youtube**: use lru_cache
182 | - **config**: rename env to config and use pydantic manage config
183 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Podmaker
2 |
3 | *Read this document in other languages: [English](README.md), [简体中文](README.zh_CN.md)*
4 |
5 | Convert online media into podcast feeds.
6 |
7 | 
8 | 
9 | 
10 | 
11 | 
12 |
13 |
14 | ## Features
15 |
16 | - Extract audio from online videos.
17 | - No need to deploy web services.
18 | - Generate podcast feeds.
19 | - Deploy with watch mode to keep feeds up-to-date.
20 |
21 | ## Dependencies
22 |
23 | This tool uses **ffmpeg** to extract audio from videos. Ensure it's installed within `$PATH` before using this tool.
24 |
25 | Additionally, you should install extra dependencies according to your requirements:
26 |
27 | - `podmaker[all]`: Install all extra dependencies.
28 | - `podmaker[s3]`: Install dependencies for S3 storage.
29 | - `podmaker[youtube]`: Install dependencies for YouTube.
30 |
31 | Install multiple extra dependencies simultaneously using `podmaker[extra1,extra2,...]`.
32 |
33 | ## Configuration
34 |
35 | Before diving into this tool, craft a configuration file, a TOML file to be precise.
36 | By default, the file resides at `${WORK_DIR}/config.toml`. Customize the path using the `-c` or `--config` option.
37 | An example configuration file can be found at [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml).
38 |
39 | ## Usage
40 |
41 | ### Systemd
42 |
43 | Deploy this tool in the background with systemd (requires root privileges):
44 |
45 | ```bash
46 | # create virtual environment
47 | apt install python3 python3-venv
48 | mkdir -p /opt/podmaker && cd /opt/podmaker
49 | python3 -m venv venv
50 |
51 | # install podmaker
52 | ./venv/bin/pip install "podmaker[all]"
53 |
54 | # create and edit config file
55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml
56 | vim config.toml
57 |
58 | # create systemd service
59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service
60 | systemctl daemon-reload
61 |
62 | # enable and start service
63 | systemctl enable podmaker
64 | systemctl start podmaker
65 | ```
66 |
67 | ### Manual
68 |
69 | ### Using pip
70 |
71 | For the optimal experience, we recommend installing this tool within a virtual environment.
72 |
73 | ```bash
74 | pip install "podmaker[all]"
75 | ```
76 |
77 | ### Using `pipx`
78 |
79 | ```bash
80 | pipx install "podmaker[all]"
81 | ```
82 |
83 | ### Run
84 |
85 | ```bash
86 | podmaker -c path/to/config.toml
87 | ```
88 |
89 | or
90 |
91 | ```bash
92 | python -m podmaker -c path/to/config.toml
93 | ```
94 |
95 | ## Roadmap
96 |
97 | ### Platforms
98 |
99 | - [x] YouTube
100 | - [x] Playlist
101 | - [x] Channel
102 | - [ ] BiliBili
103 |
104 | ### Resource Hosting
105 |
106 | - [x] S3
107 | - [x] Local
108 |
109 | ## Contributing
110 |
111 | Your contributions are invaluable. Feel free to submit pull requests.
112 | Before committing, ensure your changes pass unit tests and `autohooks`.
113 |
114 | To activate `autohooks`, use the following command:
115 |
116 | ```bash
117 | poetry run autohooks activate --mode poetry
118 | ```
119 |
120 | This process will automatically lint, format, and sort code imports.
121 |
122 | When introducing new features, remember to provide corresponding tests.
123 |
124 | ## License
125 |
126 | For licensing details, refer to [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE).
--------------------------------------------------------------------------------
/README.zh_CN.md:
--------------------------------------------------------------------------------
1 | # Podmaker
2 |
3 | *本文档的其他语言: [English](README.md), [简体中文](README.zh_CN.md)*
4 |
5 | 将在线媒体转换成播客订阅。
6 |
7 | 
8 | 
9 | 
10 | 
11 | 
12 |
13 |
14 | ## 功能
15 |
16 | - 从网络视频中提取音频;
17 | - 无需额外部署 Web 服务;
18 | - 自动生成播客订阅;
19 | - 通过 `watch` 模式自动更新订阅。
20 |
21 | ## 依赖
22 |
23 | 本工具使用 **ffmpeg** 从视频中提取音频,请确保 `$PATH` 中包含 `ffmpeg`。
24 |
25 | 另外, 你可以更根据你的需求安装额外的依赖:
26 |
27 | - `podmaker[all]`: 安装下述的所有依赖;
28 | - `podmaker[s3]`: 提供 S3 支持;
29 | - `podmaker[youtube]`: 提供 YouTube 支持。
30 |
31 | 你可以使用 `podmaker[extra1,extra2,...]` 的方式同时安装多个额外依赖。
32 |
33 | ## 配置
34 |
35 | 在开始使用本工具之前,请先准备一个 TOML 格式的配置文件。
36 | 默认情况下,配置文件位于 `${WORK_DIR}/config.toml`。你可以通过 `-c` 或 `--config` 选项来指定配置文件的路径。
37 | 你可以在 [config.example.toml](https://github.com/YogiLiu/podmaker/blob/main/config.example.toml) 中找到一个示例配置文件。
38 |
39 | ## 使用方法
40 |
41 | ### Systemd
42 |
43 | 使用 systemd 后台运行本工具(需要 root 权限):
44 |
45 | ```bash
46 | # 创建虚拟环境
47 | apt install python3 python3-venv
48 | mkdir -p /opt/podmaker && cd /opt/podmaker
49 | python3 -m venv venv
50 |
51 | # 安装 podmaker
52 | ./venv/bin/pip install "podmaker[all]"
53 |
54 | # 创建配置文件
55 | curl -o config.toml https://raw.githubusercontent.com/YogiLiu/podmaker/main/config.example.toml
56 | vim config.toml
57 |
58 | # 创建 systemd 服务
59 | curl -o /etc/systemd/system/podmaker.service https://raw.githubusercontent.com/YogiLiu/podmaker/main/systemd/podmaker.service
60 | systemctl daemon-reload
61 |
62 | # 启动服务,并设置开机自启
63 | systemctl enable podmaker
64 | systemctl start podmaker
65 | ```
66 |
67 | ### 手动运行
68 |
69 | ### 使用 pip 安装
70 |
71 | 为了获得最佳体验,我们建议你在虚拟环境中安装本工具。
72 |
73 | ```bash
74 | pip install "podmaker[all]"
75 | ```
76 |
77 | ### 使用 `pipx` 安装
78 |
79 | ```bash
80 | pipx install "podmaker[all]"
81 | ```
82 |
83 | ### 运行
84 |
85 | ```bash
86 | podmaker -c path/to/config.toml
87 | ```
88 |
89 | 或者
90 |
91 | ```bash
92 | python -m podmaker -c path/to/config.toml
93 | ```
94 |
95 | ## 项目规划
96 |
97 | ### 平台支持
98 |
99 | - [x] YouTube
100 | - [x] 播放列表
101 | - [x] 频道
102 | - [ ] 哔哩哔哩(鸽)
103 |
104 | ### 资源托管
105 |
106 | - [x] S3
107 | - [x] 本地文件
108 |
109 | ## 贡献指南
110 |
111 | 你的贡献弥足珍贵,请不要吝啬提出你的 Pull Request。
112 | 在提交代码之前,请确保你的代码通过单元测试和 `autohooks`。
113 |
114 | 你可以使用下述命令激活 `autohooks`:
115 |
116 | ```bash
117 | poetry run autohooks activate --mode poetry
118 | ```
119 |
120 | 这个程序会自动进行代码风格检查、格式化和 import 排序。
121 |
122 | 如果你添加了新的功能,请确保提供了相应的测试。
123 |
124 | ## 许可证
125 |
126 | 查看许可证详情,请参阅 [LICENSE](https://github.com/YogiLiu/podmaker/blob/main/LICENSE)。
--------------------------------------------------------------------------------
/config.example.toml:
--------------------------------------------------------------------------------
1 | [app]
2 | # running mode, "oneshot" or "watch"
3 | # - oneshot: generate the feed and exit
4 | # - watch: generate the feed and watch for changes
5 | mode = "oneshot"
6 |
7 | # level of logging, "DEBUG", "INFO", "WARNING", "ERROR"
8 | loglevel = "INFO"
9 |
10 | # optional, the admin of the feed
11 | [owner]
12 | name = "podmaker"
13 | email = "admin@podmaker.dev"
14 |
15 | # notice: the sources is an array, it must specify using `[[]]`
16 | [[sources]]
17 | # used to generate the feed url, must be unique, prefer to use numbers, letters, space and underscores
18 | id = "source_1"
19 | # optional, the display name of the source
20 | name = "Source 1"
21 | # optional, the regex to match the episode
22 | regex = "Episode \\d+"
23 | # the url of the source
24 | url = "https://example.com/source_1/"
25 | # optional, the interval to check the source, in seconds, default to 3600
26 | interval = 3600
27 |
28 | [[sources]]
29 | id = "source_2"
30 | name = "Source 2"
31 | regex = "Episode \\d+"
32 | url = "https://example.com/source_2/"
33 | interval = 3600
34 |
35 | # only one is allowed to be specified
36 | [storage]
37 | # destination of the generated feed, support "local" and "s3"
38 | dest = "local"
39 | # the directory to store the generated feed, your must change it
40 | # the files will save to $base_dir/data/, and the feed will save to $base_dir/data/feed.xml
41 | # you can use nginx to serve the $base_dir/data/
42 | # !!WARNING!! don't serve the $base_dir/ directly, it may leak your config file
43 | base_dir = "/path/to/storage"
44 | # must be public-read, this endpoint should be pointed to $base_dir/data/
45 | public_endpoint = "https://example.com/"
46 |
47 | #[storage]
48 | #dest = "s3"
49 | #access_key = "123"
50 | #access_secret = "456"
51 | #bucket = "podmake"
52 | #endpoint = "https://s3.amazonaws.com/"
53 | #public_endpoint = "https://s3.amazonaws.com/"
54 |
--------------------------------------------------------------------------------
/podmaker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/__init__.py
--------------------------------------------------------------------------------
/podmaker/__main__.py:
--------------------------------------------------------------------------------
1 | from podmaker.cli import run
2 |
3 | if __name__ == '__main__':
4 | run()
5 |
--------------------------------------------------------------------------------
/podmaker/cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import sys
4 | from pathlib import Path
5 |
6 | from podmaker.config import ConfigError, PMConfig
7 | from podmaker.processor import get_processor
8 | from podmaker.storage import get_storage
9 | from podmaker.util import exit_signal
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def run() -> None:
15 | parser = argparse.ArgumentParser(prog='podmaker', description='Podcast generator.')
16 | parser.add_argument('-c', '--conf', help='Path to config file (default: config.toml).', type=Path,
17 | default=Path('config.toml'))
18 | args = parser.parse_args()
19 | config_path = args.conf
20 | config: PMConfig
21 | try:
22 | config = PMConfig.from_file(config_path)
23 | except ConfigError as e:
24 | logger.error(e)
25 | sys.exit(1)
26 | logging.basicConfig(
27 | level=config.app.loglevel,
28 | format='%(asctime)s %(levelname)s %(name)s %(message)s',
29 | )
30 | storage = get_storage(config.storage)
31 | storage.start()
32 | logger.info(f'running in {config.app.mode} mode')
33 | processor = get_processor(config, storage)
34 | exit_signal.listen()
35 | try:
36 | processor.run()
37 | except BaseException:
38 | storage.stop()
39 | raise
40 |
--------------------------------------------------------------------------------
/podmaker/config/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['OwnerConfig', 'AppConfig', 'StorageConfig', 'SourceConfig', 'PMConfig', 'ConfigError', 'S3Config',
2 | 'LocalConfig']
3 |
4 | from podmaker.config.core import AppConfig, ConfigError, OwnerConfig, PMConfig, SourceConfig
5 | from podmaker.config.storage import LocalConfig, S3Config, StorageConfig
6 |
--------------------------------------------------------------------------------
/podmaker/config/core.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | import sys
5 | from pathlib import PurePath
6 | from typing import Literal, Optional, Union
7 | from urllib.parse import quote
8 |
9 | from pydantic import BaseModel, EmailStr, Field, HttpUrl, ValidationError
10 |
11 | from podmaker.config.storage import LocalConfig, S3Config
12 |
13 | if sys.version_info >= (3, 11):
14 | import tomllib as toml
15 | else:
16 | import tomlkit as toml
17 |
18 |
19 | class OwnerConfig(BaseModel):
20 | name: Optional[str] = Field(None, min_length=1, frozen=True)
21 | email: EmailStr = Field(frozen=True)
22 |
23 |
24 | # noinspection PyNestedDecorators
25 | class AppConfig(BaseModel):
26 | mode: Literal['oneshot', 'watch'] = Field('oneshot', frozen=True)
27 | loglevel: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = Field('INFO', frozen=True)
28 |
29 |
30 | class SourceConfig(BaseModel):
31 | id: str = Field(min_length=1, frozen=True)
32 | name: Optional[str] = Field(None, min_length=1, frozen=True)
33 | regex: Optional[re.Pattern[str]] = Field(None, frozen=True)
34 | url: HttpUrl = Field(frozen=True)
35 | interval: int = Field(1 * 60 * 60, ge=1, frozen=True)
36 |
37 | def get_storage_key(self, key: str) -> str:
38 | return f'{quote(self.id)}/{key}'
39 |
40 |
41 | class ConfigError(Exception):
42 | pass
43 |
44 |
45 | class PMConfig(BaseModel):
46 | owner: Optional[OwnerConfig] = Field(None, frozen=True)
47 | storage: Union[S3Config, LocalConfig] = Field(frozen=True)
48 | sources: tuple[SourceConfig, ...] = Field(frozen=True)
49 | app: AppConfig = Field(default_factory=AppConfig, frozen=True)
50 |
51 | @classmethod
52 | def from_file(cls, path: PurePath) -> PMConfig:
53 | try:
54 | with open(path, 'rb') as f:
55 | doc = toml.load(f)
56 | # https://github.com/sdispater/tomlkit/issues/275
57 | if getattr(doc, 'unwrap', None):
58 | data = doc.unwrap()
59 | else:
60 | data = doc
61 | except FileNotFoundError as e:
62 | raise ConfigError(f'config file not found: {path}') from e
63 | try:
64 | return cls(**data)
65 | except ValidationError as e:
66 | raise ConfigError(f'can not initial config: {e}')
67 |
--------------------------------------------------------------------------------
/podmaker/config/storage.py:
--------------------------------------------------------------------------------
1 | from pathlib import PurePath
2 | from typing import Literal
3 |
4 | from pydantic import BaseModel, Field, HttpUrl
5 |
6 | SupportedStorage = Literal['s3', 'local']
7 |
8 |
9 | class StorageConfig(BaseModel):
10 | dest: SupportedStorage = Field(min_length=1, frozen=True)
11 |
12 |
13 | class S3Config(StorageConfig):
14 | dest: Literal['s3'] = Field(frozen=True)
15 | access_key: str = Field(min_length=1, frozen=True)
16 | access_secret: str = Field(min_length=1, frozen=True)
17 | bucket: str = Field(min_length=1, frozen=True)
18 | endpoint: HttpUrl = Field(frozen=True)
19 | public_endpoint: HttpUrl = Field(frozen=True)
20 |
21 |
22 | class LocalConfig(StorageConfig):
23 | dest: Literal['local'] = Field(frozen=True)
24 | base_dir: PurePath = Field(min_length=1, frozen=True)
25 | public_endpoint: HttpUrl = Field(frozen=True)
26 |
--------------------------------------------------------------------------------
/podmaker/fetcher/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['Fetcher']
2 |
3 | from podmaker.fetcher.core import Fetcher
4 |
--------------------------------------------------------------------------------
/podmaker/fetcher/core.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from podmaker.config import SourceConfig
4 | from podmaker.rss import Podcast
5 |
6 |
7 | class Fetcher(ABC):
8 | @abstractmethod
9 | def fetch(self, source: SourceConfig) -> Podcast:
10 | raise NotImplementedError
11 |
12 | def start(self) -> None:
13 | pass
14 |
15 | def stop(self) -> None:
16 | pass
17 |
--------------------------------------------------------------------------------
/podmaker/fetcher/youtube.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | __all__ = ['YouTube']
4 |
5 | import logging
6 | import os
7 | import sys
8 | import tempfile
9 | from datetime import datetime, timedelta, timezone
10 | from functools import lru_cache
11 | from inspect import isgenerator
12 | from tempfile import TemporaryDirectory
13 | from typing import Any, Iterable
14 | from urllib.parse import ParseResult, urlparse
15 |
16 | from podmaker.config import OwnerConfig, SourceConfig
17 | from podmaker.fetcher import Fetcher
18 | from podmaker.rss import Enclosure, Episode, Owner, Podcast, Resource
19 | from podmaker.rss.core import PlainResource
20 | from podmaker.storage import Storage
21 | from podmaker.util import exit_signal
22 |
23 | logger = logging.getLogger(__name__)
24 |
25 | try:
26 | import yt_dlp
27 | except ImportError:
28 | logger.error('yt_dlp is not installed. youtube fetcher is not available.')
29 | sys.exit(1)
30 |
31 |
32 | class YouTube(Fetcher):
33 | def __init__(self, storage: Storage, owner_config: OwnerConfig | None):
34 | self.storage = storage
35 | self.ydl_opts = {
36 | 'logger': logging.getLogger('yt_dlp'),
37 | 'cachedir': tempfile.gettempdir(),
38 | }
39 | self.owner_config = owner_config
40 |
41 | def fetch_info(self, url: str) -> dict[str, Any]:
42 | with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
43 | info = ydl.extract_info(str(url), download=False, process=False) # type: dict[str, Any]
44 | return info
45 |
46 | def fetch(self, source: SourceConfig) -> Podcast:
47 | info = self.fetch_info(str(source.url))
48 | if isgenerator(info.get('entries', None)):
49 | return self.fetch_entries(info, source)
50 | raise ValueError(f'unsupported url: {source.url}')
51 |
52 | def fetch_entries(self, info: dict[str, Any], source: SourceConfig) -> Podcast:
53 | logger.info(f'[{source.id}] parse entries: {source.url}')
54 | if self.owner_config:
55 | owner = Owner(name=self.owner_config.name, email=self.owner_config.email)
56 | else:
57 | owner = None
58 | podcast = Podcast(
59 | items=Entry(info.get('entries', []), self.ydl_opts, self.storage, source),
60 | link=urlparse(info['webpage_url']),
61 | title=source.name or info['title'],
62 | image=EntryThumbnail(info['thumbnails']),
63 | description=info['description'],
64 | owner=owner,
65 | author=info['uploader'],
66 | categories=info.get('tags', []),
67 | )
68 | return podcast
69 |
70 |
71 | class Entry(Resource[Iterable[Episode]]):
72 | def __init__(
73 | self, entries: Iterable[dict[str, Any]], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig):
74 | self.entries = entries
75 | self.ydl_opts = ydl_opts
76 | self.storage = storage
77 | self.source = source
78 |
79 | def get(self) -> Iterable[Episode] | None:
80 | logger.debug(f'[{self.source.id}] fetch items')
81 | with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
82 | is_empty = True
83 | for entry in self.entries:
84 | exit_signal.check()
85 | is_empty = False
86 | try:
87 | video_info = ydl.extract_info(entry['url'], download=False)
88 | except yt_dlp.DownloadError as e:
89 | logger.error(f'[{self.source.id}] failed to fetch item({entry["url"]}) due to {e}')
90 | continue
91 | if self.source.regex and not self.source.regex.search(video_info['title']):
92 | logger.info(f'[{self.source.id}] skip item {video_info["id"]} due to regex')
93 | continue
94 | upload_at = datetime.strptime(video_info['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc)
95 | logger.info(f'[{self.source.id}] fetch item: {video_info["id"]}')
96 | yield Episode(
97 | enclosure=Audio(video_info, self.ydl_opts, self.storage, self.source),
98 | title=video_info['title'],
99 | description=video_info['description'],
100 | guid=video_info['id'],
101 | duration=timedelta(seconds=video_info['duration']),
102 | pub_date=upload_at,
103 | link=urlparse(video_info['webpage_url']),
104 | image=PlainResource(urlparse(video_info['thumbnail'])),
105 | )
106 | if is_empty:
107 | return None
108 |
109 |
110 | class EntryThumbnail(Resource[ParseResult]):
111 | def __init__(self, thumbnails: list[dict[str, Any]]):
112 | self.thumbnails = thumbnails
113 |
114 | def get(self) -> ParseResult | None:
115 | if len(self.thumbnails) == 0:
116 | return None
117 | thumbnail = max(self.thumbnails, key=lambda t: t.get('width', 0))
118 | result: ParseResult = urlparse(thumbnail['url'])
119 | return result
120 |
121 |
122 | class Audio(Resource[Enclosure]):
123 | def __init__(self, info: dict[str, Any], ydl_opts: dict[str, Any], storage: Storage, source: SourceConfig):
124 | self.info = info
125 | self.ydl_opts: dict[str, Any] = {
126 | 'format': 'ba',
127 | 'postprocessors': [{
128 | 'key': 'FFmpegExtractAudio',
129 | 'preferredcodec': 'mp3',
130 | }],
131 | }
132 | self.ydl_opts.update(ydl_opts)
133 | self.storage = storage
134 | self.source = source
135 |
136 | def upload(self, key: str) -> tuple[ParseResult, int]:
137 | logger.debug(f'[{self.source.id}] upload audio: {key}')
138 | with TemporaryDirectory(prefix='podmaker_youtube_') as cache_dir:
139 | opts = {'paths': {'home': cache_dir}}
140 | opts.update(self.ydl_opts)
141 | with yt_dlp.YoutubeDL(opts) as ydl:
142 | logger.info(f'[{self.source.id}] fetch audio: {self.info["id"]}')
143 | downloaded_info = ydl.extract_info(self.info['webpage_url'])
144 | audio_path = downloaded_info['requested_downloads'][0]['filepath']
145 | length = os.path.getsize(audio_path)
146 | with open(audio_path, 'rb') as f:
147 | logger.info(f'[{self.source.id}] upload audio: {key}')
148 | url = self.storage.put(f, key=key, content_type='audio/mp3')
149 | return url, length
150 |
151 | @lru_cache(maxsize=1)
152 | def get(self) -> Enclosure | None:
153 | logger.debug(f'[{self.source.id}] fetch audio: {self.info["id"]}')
154 | key = self.source.get_storage_key(f'youtube/{self.info["id"]}.mp3')
155 | info = self.storage.check(key)
156 | if info:
157 | logger.info(f'[{self.source.id}] audio already exists: {key}')
158 | url = info.uri
159 | length = info.size
160 | else:
161 | url, length = self.upload(key)
162 | return Enclosure(url=url, length=length, type='audio/mp3')
163 |
--------------------------------------------------------------------------------
/podmaker/processor/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['Processor', 'ScheduleProcessor', 'get_processor']
2 |
3 | from podmaker.config import PMConfig
4 | from podmaker.processor.core import Processor
5 | from podmaker.processor.scheduling import ScheduleProcessor
6 | from podmaker.storage import Storage
7 |
8 |
9 | def get_processor(config: PMConfig, storage: Storage) -> Processor:
10 | if config.app.mode == 'watch':
11 | return ScheduleProcessor(config=config, storage=storage)
12 | else:
13 | return Processor(config=config, storage=storage)
14 |
--------------------------------------------------------------------------------
/podmaker/processor/core.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import logging
4 | from concurrent.futures import ThreadPoolExecutor
5 | from contextlib import contextmanager
6 | from typing import Any, Iterator
7 |
8 | from podmaker.config import PMConfig, SourceConfig
9 | from podmaker.fetcher import Fetcher
10 | from podmaker.processor.task import Task
11 | from podmaker.storage import Storage
12 | from podmaker.util import exit_signal
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | class Processor:
18 | def __init__(self, config: PMConfig, storage: Storage):
19 | self._config = config
20 | self._storage = storage
21 | exit_signal.register(self._exit_handler)
22 | self._fetcher_instances: dict[str, Fetcher] = {}
23 |
24 | @contextmanager
25 | def _context(self) -> Iterator[None]:
26 | for fetcher in self._fetcher_instances.values():
27 | fetcher.start()
28 | try:
29 | yield
30 | finally:
31 | for fetcher in self._fetcher_instances.values():
32 | fetcher.stop()
33 |
34 | def _get_fetcher(self, source: SourceConfig) -> Fetcher:
35 | if source.url.host not in self._fetcher_instances:
36 | if source.url.host == 'www.youtube.com':
37 | from podmaker.fetcher.youtube import YouTube
38 | self._fetcher_instances[source.url.host] = YouTube(self._storage, self._config.owner)
39 | else:
40 | raise ValueError(f'unsupported host: {source.url.host}')
41 | return self._fetcher_instances[source.url.host]
42 |
43 | @property
44 | def _tasks(self) -> Iterator[Task]:
45 | for source in self._config.sources:
46 | fetcher = self._get_fetcher(source)
47 | yield Task(fetcher, source, self._storage, self._config.owner)
48 |
49 | def _exit_handler(self, *_: Any) -> None:
50 | logger.warning('received exit signal')
51 | self.exit_handler()
52 |
53 | def exit_handler(self, *_: Any) -> None:
54 | pass
55 |
56 | def run(self) -> None:
57 | with self._context():
58 | with ThreadPoolExecutor(max_workers=5) as executor:
59 | for task in self._tasks:
60 | logger.info(f'submit task: {task.id}')
61 | executor.submit(task.execute)
62 | logger.info('processor exited')
63 |
--------------------------------------------------------------------------------
/podmaker/processor/scheduling.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from datetime import datetime
3 | from typing import Any
4 |
5 | from apscheduler.jobstores.base import JobLookupError
6 | from apscheduler.schedulers.blocking import BlockingScheduler
7 | from apscheduler.triggers.interval import IntervalTrigger
8 |
9 | from podmaker.config import PMConfig
10 | from podmaker.processor.core import Processor
11 | from podmaker.storage import Storage
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | class ScheduleProcessor(Processor):
17 | def __init__(self, config: PMConfig, storage: Storage):
18 | super().__init__(config, storage)
19 | self._scheduler = BlockingScheduler()
20 |
21 | def exit_handler(self, *_: Any) -> None:
22 | self._scheduler.shutdown(wait=False)
23 |
24 | def _before_hook(self, task_id: str) -> None:
25 | try:
26 | self._scheduler.pause_job(task_id)
27 | except JobLookupError:
28 | logger.warning(f'task({task_id}) not found, maybe it was removed')
29 |
30 | def _after_hook(self, task_id: str) -> None:
31 | try:
32 | self._scheduler.resume_job(task_id)
33 | except JobLookupError:
34 | logger.warning(f'task({task_id}) not found, maybe it was removed')
35 |
36 | def run(self) -> None:
37 | with self._context():
38 | for task in self._tasks:
39 | logger.info(f'schedule task: {task.id}, it well be run after 1 minute and every 1 hour')
40 | task.before = self._before_hook
41 | task.after = self._after_hook
42 | self._scheduler.add_job(
43 | func=task.execute,
44 | trigger=IntervalTrigger(seconds=task.interval),
45 | next_run_time=datetime.now(),
46 | id=task.id,
47 | name=f'Job-{task.id}',
48 | )
49 | self._scheduler.start()
50 | logger.info('processor exited')
51 |
52 |
--------------------------------------------------------------------------------
/podmaker/processor/task.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import logging
4 | from io import BytesIO
5 | from typing import Any, Callable
6 | from uuid import uuid4
7 |
8 | from podmaker.config import OwnerConfig, SourceConfig
9 | from podmaker.fetcher import Fetcher
10 | from podmaker.rss import Podcast
11 | from podmaker.storage import EMPTY_FILE, Storage
12 | from podmaker.util import ExitSignalError
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | Hook = Callable[[str], None]
17 |
18 |
19 | def _do_nothing(*_: Any) -> None:
20 | pass
21 |
22 |
23 | class Task:
24 | def __init__(self, fetcher: Fetcher, source: SourceConfig, storage: Storage, owner: OwnerConfig | None):
25 | self._id = uuid4().hex
26 | logger.info(f'create task {self._id} for {source.id}')
27 | self._source = source
28 | self._storage = storage
29 | self._owner = owner
30 | self._fetcher = fetcher
31 | self.before: Hook = _do_nothing
32 | self.after: Hook = _do_nothing
33 |
34 | @property
35 | def id(self) -> str:
36 | return self._id
37 |
38 | @property
39 | def interval(self) -> int:
40 | return self._source.interval
41 |
42 | def _fetch_original(self, key: str) -> Podcast | None:
43 | with self._storage.get(key) as xml_file:
44 | if xml_file == EMPTY_FILE:
45 | logger.info(f'no original file: {key}')
46 | return None
47 | xml = xml_file.read()
48 | return Podcast.from_rss(xml.decode('utf-8'))
49 |
50 | def _execute(self) -> None:
51 | logger.info(f'execute task: {self.id}')
52 | try:
53 | key = self._source.get_storage_key('feed.rss')
54 | original_pod = self._fetch_original(key)
55 | source_pod = self._fetcher.fetch(self._source)
56 | if original_pod:
57 | has_changed = original_pod.merge(source_pod)
58 | else:
59 | has_changed = True
60 | original_pod = source_pod
61 | if has_changed:
62 | logger.info(f'update: {self._source.id}')
63 | buf = BytesIO(original_pod.bytes)
64 | self._storage.put(buf, key, content_type='text/xml; charset=utf-8')
65 | else:
66 | logger.info(f'no change: {self._source.id}')
67 | except ExitSignalError as e:
68 | logger.warning(f'task ({self.id}) cancelled due to {e}')
69 | except BaseException as e:
70 | logger.error(f'task execute failed: {e} task: {self.id}')
71 |
72 | def execute(self) -> None:
73 | logger.debug(f'task running: {self._source.id}')
74 | self.before(self.id)
75 | self._execute()
76 | logger.debug(f'task finished: {self.id}')
77 | self.after(self.id)
78 |
--------------------------------------------------------------------------------
/podmaker/rss/README.md:
--------------------------------------------------------------------------------
1 | Read more about the RSS feed in
2 | the [RSS feed guidelines for Google Podcasts](https://support.google.com/podcast-publishers/answer/9889544?sjid=3442458601435072975-NA) and
3 | [Podcast RSS feed requirements for Apple Podcasts](https://podcasters.apple.com/support/823-podcast-requirements).
4 |
--------------------------------------------------------------------------------
/podmaker/rss/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | 'Resource',
3 | 'Enclosure',
4 | 'Episode',
5 | 'Podcast',
6 | 'Owner',
7 | ]
8 |
9 | from podmaker.rss.core import Resource
10 | from podmaker.rss.enclosure import Enclosure
11 | from podmaker.rss.episode import Episode
12 | from podmaker.rss.podcast import Owner, Podcast
13 |
--------------------------------------------------------------------------------
/podmaker/rss/core.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from abc import ABCMeta, abstractmethod
5 | from typing import Any, Generic, TypeVar
6 | from xml.etree.ElementTree import Element, fromstring, tostring
7 |
8 | from podmaker.rss.util.namespace import NamespaceGenerator
9 | from podmaker.rss.util.parse import XMLParser
10 | from podmaker.util import exit_signal
11 |
12 | if sys.version_info >= (3, 11):
13 | from typing import Self
14 | else:
15 | from typing_extensions import Self
16 |
17 | ResourceType = TypeVar('ResourceType')
18 |
19 |
20 | class Resource(Generic[ResourceType], metaclass=ABCMeta):
21 | @abstractmethod
22 | def get(self) -> ResourceType | None:
23 | raise NotImplementedError
24 |
25 | def ensure(self) -> ResourceType:
26 | resource = self.get()
27 | if resource is None:
28 | raise ValueError('Resource not found')
29 | return resource
30 |
31 | def __getattribute__(self, name: Any) -> Any:
32 | if name == 'get':
33 | exit_signal.check()
34 | return super().__getattribute__(name)
35 |
36 |
37 | class PlainResource(Resource[ResourceType]):
38 | """
39 | A resource that is not fetched from a remote location.
40 | It is useful for store resources that are already available in memory.
41 | """
42 |
43 | def __init__(self, resource: ResourceType):
44 | self.resource = resource
45 |
46 | def get(self) -> ResourceType:
47 | return self.resource
48 |
49 |
50 | # noinspection HttpUrlsUsage
51 | itunes = NamespaceGenerator('itunes', 'http://www.itunes.com/dtds/podcast-1.0.dtd')
52 | # noinspection HttpUrlsUsage
53 | content = NamespaceGenerator('content', 'http://purl.org/rss/1.0/modules/content/')
54 |
55 |
56 | class RSSComponent(XMLParser, metaclass=ABCMeta):
57 | namespace = dict(**itunes.namespace, **content.namespace)
58 |
59 | @property
60 | @abstractmethod
61 | def xml(self) -> Element:
62 | raise NotImplementedError
63 |
64 | @classmethod
65 | @abstractmethod
66 | def from_xml(cls, el: Element) -> Self:
67 | raise NotImplementedError
68 |
69 | @abstractmethod
70 | def merge(self, other: Self) -> bool:
71 | """
72 | Merge the other component into this one.
73 | :return: Whether changes were made.
74 | """
75 | raise NotImplementedError
76 |
77 | @staticmethod
78 | def _el_creator(tag: str, text: str | None = None, attrib: dict[str, str] | None = None) -> Element:
79 | el = Element(tag, attrib or {})
80 | if text is not None:
81 | el.text = text
82 | return el
83 |
84 | def _common_merge(self, other: Self, field: str | tuple[str, ...]) -> bool:
85 | if isinstance(field, tuple):
86 | return any(self._common_merge(other, f) for f in field)
87 | a = getattr(self, field)
88 | b = getattr(other, field)
89 | if a != b:
90 | setattr(self, field, b)
91 | return True
92 | return False
93 |
94 |
95 | # https://www.w3.org/TR/xml/#sec-pi
96 | _pis = ''
97 | _pis_bytes = _pis.encode('utf-8')
98 |
99 |
100 | class RSSSerializer(RSSComponent, metaclass=ABCMeta):
101 | @property
102 | def str(self) -> str:
103 | s = tostring(self.xml, encoding='unicode')
104 | return _pis + s
105 |
106 | @property
107 | def bytes(self) -> bytes:
108 | s = tostring(self.xml, encoding='utf-8') # type: bytes
109 | return _pis_bytes + s
110 |
111 |
112 | class RSSDeserializer(RSSComponent, metaclass=ABCMeta):
113 | @classmethod
114 | def from_rss(cls, rss: str | bytes) -> Self:
115 | if isinstance(rss, bytes):
116 | rss = rss.decode('utf-8')
117 | el: Element = fromstring(rss)
118 | return cls.from_xml(el)
119 |
--------------------------------------------------------------------------------
/podmaker/rss/enclosure.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from dataclasses import dataclass
3 | from urllib.parse import ParseResult, urlparse
4 | from xml.etree.ElementTree import Element
5 |
6 | from podmaker.rss.core import RSSComponent
7 |
8 | if sys.version_info >= (3, 11):
9 | from typing import Self
10 | else:
11 | from typing_extensions import Self
12 |
13 |
14 | @dataclass
15 | class Enclosure(RSSComponent):
16 | # URL of the episode audio file.
17 | url: ParseResult
18 | # Size of the episode audio file in bytes.
19 | length: int
20 | # The standard MIME type of the episode.
21 | type: str
22 |
23 | @property
24 | def xml(self) -> Element:
25 | return self._el_creator(
26 | 'enclosure',
27 | attrib={'url': self.url.geturl(), 'length': str(self.length), 'type': self.type}
28 | )
29 |
30 | @classmethod
31 | def from_xml(cls, el: Element) -> Self:
32 | url = urlparse(cls._parse_required_attrib(el, '.', 'url'))
33 | length_str = cls._parse_required_attrib(el, '.', 'length')
34 | try:
35 | length = int(length_str)
36 | except ValueError:
37 | raise ValueError(f'length must be int: {length_str}')
38 | content_type = cls._parse_required_attrib(el, '.', 'type')
39 | return cls(
40 | url,
41 | length,
42 | content_type
43 | )
44 |
45 | def merge(self, other: Self) -> bool:
46 | return self._common_merge(other, ('url', 'length', 'type'))
47 |
--------------------------------------------------------------------------------
/podmaker/rss/episode.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import logging
4 | import math
5 | import sys
6 | from dataclasses import dataclass
7 | from datetime import datetime, timedelta, timezone
8 | from email.utils import format_datetime, parsedate_to_datetime
9 | from typing import Any
10 | from urllib.parse import ParseResult, urlparse
11 | from xml.etree.ElementTree import Element
12 |
13 | from podmaker.rss import Enclosure, Resource
14 | from podmaker.rss.core import PlainResource, RSSComponent, itunes
15 |
16 | if sys.version_info >= (3, 11):
17 | from typing import Self
18 | else:
19 | from typing_extensions import Self
20 |
21 | logger = logging.getLogger(__name__)
22 |
23 |
24 | @dataclass
25 | class Episode(RSSComponent):
26 | # Fully-qualified URL of the episode audio file, including the format extension (for example, .wav, .mp3).
27 | enclosure: Resource[Enclosure]
28 | # Title of the podcast episode.
29 | title: str
30 | # A plaintext description of the podcast.
31 | description: str | None = None
32 | # Indicates whether this episode contains explicit language or adult content.
33 | explicit: bool | None = False
34 | # A permanently-assigned, case-sensitive Globally Unique Identifier for a podcast episode.
35 | guid: str | None = None
36 | # Duration of the episode.
37 | duration: timedelta | None = None
38 | # Publication date of the episode, in RFC 822 (section 5.1) format.
39 | # https://www.rfc-editor.org/rfc/rfc822#section-5.1
40 | pub_date: datetime | None = None
41 | # An episode link URL.
42 | link: ParseResult | None = None
43 | # The episode artwork.
44 | image: Resource[ParseResult] | None = None
45 |
46 | @property
47 | def xml(self) -> Element:
48 | el = Element('item')
49 | el.append(self._enclosure_el)
50 | el.append(self._title_el)
51 | el.append(self._itunes_title_el)
52 | if self.description:
53 | el.append(self._description_el)
54 | el.append(self._summary_e)
55 | if self.explicit is not None:
56 | el.append(self._explicit_el)
57 | if self.guid:
58 | el.append(self._guid_el)
59 | if self.duration:
60 | el.append(self._duration_el)
61 | if self.pub_date:
62 | el.append(self._pub_date_el)
63 | if self.link:
64 | el.append(self._link_el)
65 | if self.image:
66 | el.append(self._image_el)
67 | return el
68 |
69 | @classmethod
70 | def from_xml(cls, el: Element) -> Self:
71 | enclosure = cls._parse_enclosure(el)
72 | itunes_title = cls._parse_optional_text(el, f'.{itunes("title")}')
73 | if itunes_title is None:
74 | title = cls._parse_required_text(el, '.title')
75 | else:
76 | title = itunes_title
77 | description = cls._parse_optional_text(el, '.description')
78 | if description is None:
79 | description = cls._parse_optional_text(el, f'.{itunes("summary")}')
80 | explicit_str = cls._parse_optional_text(el, f'.{itunes("explicit")}')
81 | explicit = explicit_str == 'yes' if explicit_str is not None else None
82 | guid = cls._parse_optional_text(el, '.guid')
83 | duration = cls._parse_duration(el)
84 | pub_date = cls._parse_pub_date(el)
85 | link_str = cls._parse_optional_text(el, '.link')
86 | if link_str is not None:
87 | link = urlparse(link_str)
88 | else:
89 | link = None
90 | image_url = cls._parse_optional_attrib(el, f'.{itunes("image")}', 'href')
91 | if image_url is not None:
92 | image = PlainResource(urlparse(image_url))
93 | else:
94 | image = None
95 | return cls(enclosure, title, description, explicit, guid, duration, pub_date, link, image)
96 |
97 | def merge(self, other: Self) -> bool:
98 | has_changed = False
99 | enclosure = self.enclosure.ensure()
100 | if enclosure.merge(other.enclosure.ensure()):
101 | has_changed = True
102 | self.enclosure = PlainResource(enclosure)
103 | return any([
104 | has_changed,
105 | self._common_merge(
106 | other,
107 | ('title', 'description', 'explicit', 'guid', 'duration', 'pub_date')
108 | )
109 | ])
110 |
111 | @property
112 | def unique_id(self) -> str:
113 | if self.guid is None:
114 | return self.enclosure.ensure().url.geturl()
115 | return self.guid
116 |
117 | def __eq__(self, other: Any) -> bool:
118 | if not isinstance(other, Episode):
119 | return False
120 | return self.unique_id == other.unique_id
121 |
122 | def __hash__(self) -> int:
123 | return hash(self.unique_id)
124 |
125 | @classmethod
126 | def _parse_pub_date(cls, el: Element) -> datetime | None:
127 | pub_date_str = cls._parse_optional_text(el, '.pubDate')
128 | if pub_date_str is None:
129 | return None
130 | try:
131 | dt = parsedate_to_datetime(pub_date_str)
132 | except (TypeError, ValueError):
133 | try:
134 | if pub_date_str.endswith('Z'):
135 | pub_date_str = pub_date_str[:-1] + '+00:00'
136 | dt = datetime.fromisoformat(pub_date_str)
137 | except ValueError:
138 | logger.warning(f'invalid pubDate: {pub_date_str}')
139 | return None
140 | if dt.tzinfo is None:
141 | return dt.replace(tzinfo=timezone.utc)
142 | return dt
143 |
144 | @classmethod
145 | def _parse_enclosure(cls, el: Element) -> PlainResource[Enclosure]:
146 | enclosure_el = cls._parse_required_el(el, '.enclosure')
147 | return PlainResource(Enclosure.from_xml(enclosure_el))
148 |
149 | @classmethod
150 | def _parse_duration(cls, el: Element) -> timedelta | None:
151 | duration_str = cls._parse_optional_text(el, f'.{itunes("duration")}')
152 | if duration_str is None:
153 | return None
154 | try:
155 | if ':' in duration_str:
156 | secs = 0
157 | for c in duration_str.split(':'):
158 | secs = secs * 60 + int(c)
159 | else:
160 | secs = int(duration_str)
161 | return timedelta(seconds=secs)
162 | except ValueError:
163 | logger.warning(f'invalid duration: {duration_str}')
164 | return None
165 |
166 | @property
167 | def _enclosure_el(self) -> Element:
168 | return self.enclosure.ensure().xml
169 |
170 | @property
171 | def _title_el(self) -> Element:
172 | return self._el_creator('title', self.title)
173 |
174 | @property
175 | def _itunes_title_el(self) -> Element:
176 | return itunes.el('title', text=self.title)
177 |
178 | @property
179 | def _description_el(self) -> Element:
180 | if self.description is None:
181 | raise ValueError('description is required')
182 | return self._el_creator('description', self.description)
183 |
184 | @property
185 | def _summary_e(self) -> Element:
186 | if self.description is None:
187 | raise ValueError('description is required')
188 | return itunes.el('summary', text=self.description)
189 |
190 | @property
191 | def _explicit_el(self) -> Element:
192 | return itunes.el('explicit', text='yes' if self.explicit else 'no')
193 |
194 | @property
195 | def _guid_el(self) -> Element:
196 | if self.guid is None:
197 | raise ValueError('empty guid field')
198 | is_perma_link = 'false'
199 | if self.guid.startswith('http'):
200 | is_perma_link = 'true'
201 | return self._el_creator('guid', self.guid, {'isPermaLink': is_perma_link})
202 |
203 | @property
204 | def _duration_el(self) -> Element:
205 | if self.duration is None:
206 | raise ValueError('empty duration field')
207 | dur = math.ceil(self.duration.total_seconds())
208 | return itunes.el('duration', text=str(dur))
209 |
210 | @property
211 | def _pub_date_el(self) -> Element:
212 | if self.pub_date is None:
213 | raise ValueError('empty pub_date field')
214 | return self._el_creator('pubDate', format_datetime(self.pub_date))
215 |
216 | @property
217 | def _link_el(self) -> Element:
218 | if self.link is None:
219 | raise ValueError('empty link field')
220 | return self._el_creator('link', self.link.geturl())
221 |
222 | @property
223 | def _image_el(self) -> Element:
224 | if self.image is None:
225 | raise ValueError('empty image field')
226 | return itunes.el('image', attrib={'href': self.image.ensure().geturl()})
227 |
--------------------------------------------------------------------------------
/podmaker/rss/podcast.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | import sys
5 | from collections.abc import Iterable
6 | from dataclasses import dataclass, field
7 | from typing import Any
8 | from urllib.parse import ParseResult, urlparse
9 | from xml.etree.ElementTree import Element
10 |
11 | from podmaker.rss import Episode, Resource
12 | from podmaker.rss.core import PlainResource, RSSDeserializer, RSSSerializer, itunes
13 |
14 | if sys.version_info >= (3, 11):
15 | from typing import Self
16 | else:
17 | from typing_extensions import Self
18 |
19 | _category_pattern = re.compile(r'^[\w &]+$')
20 |
21 |
22 | @dataclass
23 | class Owner:
24 | email: str
25 | name: str | None = None
26 |
27 | def __eq__(self, other: Any) -> bool:
28 | if not isinstance(other, Owner):
29 | return False
30 | return self.email == other.email and self.name == other.name
31 |
32 |
33 | @dataclass
34 | class Podcast(RSSSerializer, RSSDeserializer):
35 | # Defines an episodes. At least one element in the items.
36 | items: Resource[Iterable[Episode]]
37 | # Fully-qualified URL of the homepage of the podcast.
38 | link: ParseResult
39 | # Name of the podcast.
40 | title: str
41 | # An image to associate with the podcast.
42 | image: Resource[ParseResult]
43 | # A plaintext description of the podcast.
44 | description: str
45 | # Text name(s) of the author(s) of this podcast.
46 | # This need not be the same as the owner value.
47 | author: str
48 | # Manager's email for the podcast.
49 | owner: Owner | None = None
50 | # The general topic of the podcast.
51 | categories: list[str] = field(default_factory=list)
52 | # Indicates whether the podcast is explicit language or adult content.
53 | explicit: bool = False
54 | # The two-letter language code of the podcast as defined by ISO 639-1.
55 | # https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
56 | language: str = 'en'
57 |
58 | @property
59 | def xml(self) -> Element:
60 | el = self._el_creator('rss', attrib={'version': '2.0'})
61 | channel = self._el_creator('channel')
62 | el.append(channel)
63 | channel.append(self._generator_el)
64 | channel.append(self._link_el)
65 | channel.append(self._title_el)
66 | channel.append(self._itunes_image_el)
67 | channel.append(self._image_el)
68 | channel.append(self._description_el)
69 | channel.append(self._summary_el)
70 | if self.owner:
71 | channel.append(self._owner_el)
72 | channel.append(self._author_el)
73 | for category in self._category_el:
74 | channel.append(category)
75 | channel.append(self._explicit_el)
76 | channel.append(self._language_el)
77 | for item in self._items_el:
78 | channel.append(item)
79 | return el
80 |
81 | @classmethod
82 | def from_xml(cls, el: Element) -> Self:
83 | items = cls._parse_items(el)
84 | link = urlparse(cls._parse_required_text(el, '.channel/link'))
85 | title = cls._parse_required_text(el, '.channel/title')
86 | image = cls._parse_image(el)
87 | description = cls._parse_required_text(el, '.channel/description')
88 | owner = cls._parse_owner(el)
89 | author = cls._parse_required_text(el, f'.channel/{itunes("author")}')
90 | categories = cls._parse_categories(el)
91 | explicit = cls._parse_optional_text(el, f'.channel/{itunes("explicit")}') == 'yes'
92 | language = cls._parse_optional_text(el, '.channel/language') or 'en'
93 | return cls(
94 | items,
95 | link,
96 | title,
97 | image,
98 | description,
99 | author,
100 | owner,
101 | categories,
102 | explicit,
103 | language
104 | )
105 |
106 | def merge(self, other: Self) -> bool:
107 | has_changed = self._common_merge(
108 | other,
109 | ('link', 'title', 'description', 'owner', 'author', 'explicit', 'language')
110 | )
111 | image_url = self.image.get()
112 | if image_url != other.image.get():
113 | self.image = other.image
114 | has_changed = True
115 | if set(self.categories) != set(other.categories):
116 | self.categories = other.categories
117 | has_changed = True
118 | if self._merge_items(other.items):
119 | has_changed = True
120 | return has_changed
121 |
122 | def _merge_items(self, others: Resource[Iterable[Episode]]) -> bool:
123 | new_items = []
124 | has_changed = False
125 | old_ids = {i.unique_id: i for i in self.items.ensure()}
126 | for item in others.ensure():
127 | if item.unique_id not in old_ids:
128 | new_items.append(item)
129 | else:
130 | old_item = old_ids[item.unique_id]
131 | has_changed = old_item.merge(item) or has_changed
132 | if not new_items and not has_changed:
133 | return False
134 | sorted_items = sorted(
135 | list(self.items.ensure()) + new_items,
136 | key=lambda i: i.pub_date or 0,
137 | reverse=True
138 | )
139 | self.items = PlainResource(sorted_items)
140 | return True
141 |
142 | @classmethod
143 | def _parse_owner(cls, el: Element) -> Owner | None:
144 | owner_el = cls._parse_optional_el(el, f'.channel/{itunes("owner")}')
145 | if owner_el is None:
146 | return None
147 | owner_name = cls._parse_optional_text(owner_el, f'.{itunes("name")}')
148 | owner_email = cls._parse_required_text(owner_el, f'.{itunes("email")}')
149 | return Owner(owner_email, owner_name)
150 |
151 | @classmethod
152 | def _parse_items(cls, el: Element) -> Resource[Iterable[Episode]]:
153 | item_els = cls._parse_els(el, '.channel/item')
154 | if not item_els:
155 | raise ValueError('items is required')
156 | items = []
157 | for item_el in item_els:
158 | items.append(Episode.from_xml(item_el))
159 | if not items:
160 | raise ValueError('items is required')
161 | return PlainResource(items)
162 |
163 | @classmethod
164 | def _parse_categories(cls, el: Element) -> list[str]:
165 | categories = []
166 | for category_el in cls._parse_els(el, f'.channel/{itunes("category")}'):
167 | if category_el.text:
168 | categories.append(category_el.text.strip())
169 | elif category_el.get('text'):
170 | categories.append(category_el.get('text')) # type: ignore[arg-type]
171 | return categories
172 |
173 | @classmethod
174 | def _parse_image(cls, el: Element) -> Resource[ParseResult]:
175 | href = cls._parse_optional_attrib(el, f'.channel/{itunes("image")}', 'href')
176 | if href:
177 | return PlainResource(urlparse(href))
178 | image_url = cls._parse_required_text(el, '.channel/image/url')
179 | return PlainResource(urlparse(image_url))
180 |
181 | @property
182 | def _generator_el(self) -> Element:
183 | el = self._el_creator('generator')
184 | el.append(self._el_creator('name', 'podmaker'))
185 | el.append(self._el_creator('link', 'https://github.com/YogiLiu/podmaker'))
186 | return el
187 |
188 | @property
189 | def _items_el(self) -> Iterable[Element]:
190 | is_empty = True
191 | for item in self.items.ensure():
192 | is_empty = False
193 | yield item.xml
194 | if is_empty:
195 | raise ValueError('items is required')
196 |
197 | @property
198 | def _link_el(self) -> Element:
199 | return self._el_creator('link', self.link.geturl())
200 |
201 | @property
202 | def _title_el(self) -> Element:
203 | return self._el_creator('title', self.title)
204 |
205 | @property
206 | def _itunes_image_el(self) -> Element:
207 | return itunes.el('image', attrib={'href': self.image.ensure().geturl()})
208 |
209 | @property
210 | def _image_el(self) -> Element:
211 | el = self._el_creator('image')
212 | el.append(self._el_creator('link', self.link.geturl()))
213 | el.append(self._el_creator('title', self.title))
214 | el.append(self._el_creator('url', self.image.ensure().geturl()))
215 | return el
216 |
217 | @property
218 | def _description_el(self) -> Element:
219 | return self._el_creator('description', self.description)
220 |
221 | @property
222 | def _summary_el(self) -> Element:
223 | return itunes.el('summary', text=self.description)
224 |
225 | @property
226 | def _owner_el(self) -> Element:
227 | if self.owner is None:
228 | raise ValueError('empty owner field')
229 | el = itunes.el('owner')
230 | if self.owner.name:
231 | el.append(itunes.el('name', text=self.owner.name))
232 | el.append(itunes.el('email', text=self.owner.email))
233 | return el
234 |
235 | @property
236 | def _author_el(self) -> Element:
237 | return itunes.el('author', text=self.author)
238 |
239 | @property
240 | def _category_el(self) -> Iterable[Element]:
241 | for category in self.categories:
242 | parsed_category = self._parse_category(category)
243 | if parsed_category is not None:
244 | yield itunes.el('category', attrib={'text': parsed_category})
245 |
246 | @staticmethod
247 | def _parse_category(category: str) -> str | None:
248 | if not _category_pattern.match(category):
249 | return None
250 | return category.capitalize()
251 |
252 | @property
253 | def _explicit_el(self) -> Element:
254 | return itunes.el('explicit', text='yes' if self.explicit else 'no')
255 |
256 | @property
257 | def _language_el(self) -> Element:
258 | if self.language is None:
259 | raise ValueError('empty language field')
260 | return self._el_creator('language', self.language)
261 |
--------------------------------------------------------------------------------
/podmaker/rss/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/podmaker/rss/util/__init__.py
--------------------------------------------------------------------------------
/podmaker/rss/util/namespace.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from xml.etree.ElementTree import Element, QName, register_namespace
4 |
5 |
6 | class NamespaceGenerator:
7 | def __init__(self, prefix: str, uri: str):
8 | self.prefix = prefix
9 | self.url = uri
10 | register_namespace(prefix, uri)
11 |
12 | @property
13 | def namespace(self) -> dict[str, str]:
14 | return {self.prefix: self.url}
15 |
16 | def __call__(self, tag: str) -> QName:
17 | return QName(self.url, tag)
18 |
19 | def el(self, tag: str, *, text: str| None = None, attrib: dict[str, str] | None = None) -> Element:
20 | el = Element(self(tag).text, attrib or {})
21 | if text is not None:
22 | el.text = text
23 | return el
24 |
--------------------------------------------------------------------------------
/podmaker/rss/util/parse.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from abc import ABC
4 | from xml.etree.ElementTree import Element
5 |
6 |
7 | class XMLParser(ABC):
8 | namespace: dict[str, str] = {}
9 |
10 | @classmethod
11 | def _parse_optional_text(cls, el: Element, xpath: str) -> str | None:
12 | text = el.findtext(xpath, namespaces=cls.namespace)
13 | if text is None:
14 | return None
15 | return text.strip()
16 |
17 | @classmethod
18 | def _parse_required_text(cls, el: Element, xpath: str) -> str:
19 | text = cls._parse_optional_text(el, xpath)
20 | if text is None:
21 | raise ValueError(f'{xpath} is required')
22 | return text
23 |
24 | @classmethod
25 | def _parse_optional_el(cls, el: Element, xpath: str) -> Element | None:
26 | return el.find(xpath, namespaces=cls.namespace)
27 |
28 | @classmethod
29 | def _parse_required_el(cls, el: Element, xpath: str) -> Element:
30 | target = cls._parse_optional_el(el, xpath)
31 | if target is None:
32 | raise ValueError(f'{xpath} is required')
33 | return target
34 |
35 | @classmethod
36 | def _parse_els(cls, el: Element, xpath: str) -> list[Element]:
37 | return el.findall(xpath, namespaces=cls.namespace)
38 |
39 | @classmethod
40 | def _parse_optional_attrib(cls, el: Element, xpath: str, attrib: str) -> str | None:
41 | target = cls._parse_optional_el(el, xpath)
42 | if target is None:
43 | return None
44 | attrib_value = target.get(attrib, None)
45 | if attrib_value is None:
46 | return None
47 | return attrib_value.strip()
48 |
49 | @classmethod
50 | def _parse_required_attrib(cls, el: Element, xpath: str, attrib: str) -> str:
51 | text = cls._parse_optional_attrib(el, xpath, attrib)
52 | if text is None:
53 | raise ValueError(f'attrib {attrib} of {xpath} is required')
54 | return text
55 |
--------------------------------------------------------------------------------
/podmaker/storage/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['Storage', 'ObjectInfo', 'EMPTY_FILE', 'get_storage']
2 |
3 | from podmaker.config import LocalConfig, S3Config, StorageConfig
4 | from podmaker.storage.core import EMPTY_FILE, ObjectInfo, Storage
5 |
6 |
7 | def get_storage(config: StorageConfig) -> Storage:
8 | if isinstance(config, S3Config):
9 | from podmaker.storage.s3 import S3
10 | return S3(config)
11 | elif isinstance(config, LocalConfig):
12 | from podmaker.storage.local import Local
13 | return Local(config)
14 | else:
15 | raise ValueError(f'unknown storage destination: {config.dest}')
16 |
--------------------------------------------------------------------------------
/podmaker/storage/core.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from abc import ABC, abstractmethod
4 | from contextlib import contextmanager
5 | from dataclasses import dataclass
6 | from io import BytesIO
7 | from typing import IO, AnyStr, Iterator
8 | from urllib.parse import ParseResult
9 |
10 |
11 | @dataclass
12 | class ObjectInfo:
13 | # Fully-qualified URL of the object.
14 | uri: ParseResult
15 | # Size of the object in bytes.
16 | size: int
17 | # The standard MIME type of the object.
18 | type: str
19 |
20 |
21 | EMPTY_FILE = BytesIO(b'')
22 |
23 |
24 | class Storage(ABC):
25 | @abstractmethod
26 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
27 | """
28 | :return: data uri
29 | """
30 | raise NotImplementedError
31 |
32 | @abstractmethod
33 | def check(self, key: str) -> ObjectInfo | None:
34 | raise NotImplementedError
35 |
36 | @abstractmethod
37 | @contextmanager
38 | def get(self, key: str) -> Iterator[IO[bytes]]:
39 | """
40 | :return: file-like object, return `EMPTY_FILE` if not found
41 | """
42 | raise NotImplementedError
43 |
44 | def start(self) -> None:
45 | pass
46 |
47 | def stop(self) -> None:
48 | pass
49 |
--------------------------------------------------------------------------------
/podmaker/storage/local.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | __all__ = ['Local']
4 |
5 | import logging
6 | import sqlite3
7 | import threading
8 | from contextlib import contextmanager
9 | from pathlib import Path
10 | from typing import IO, AnyStr, Iterator
11 | from urllib.parse import ParseResult, urljoin, urlparse
12 |
13 | from podmaker.config import LocalConfig
14 | from podmaker.storage import ObjectInfo, Storage
15 | from podmaker.storage.core import EMPTY_FILE
16 |
17 | logger = logging.getLogger(__name__)
18 | lock = threading.Lock()
19 |
20 |
21 | class Local(Storage):
22 | _db: sqlite3.Connection
23 | _file_buffering = 10 * 1024 * 1024 # 10MB
24 |
25 | def __init__(self, config: LocalConfig):
26 | self.public_endpoint = str(config.public_endpoint)
27 | self.base_dir = Path(config.base_dir)
28 | self.data_dir = self.base_dir / 'data'
29 |
30 | def start(self) -> None:
31 | if not self.base_dir.exists():
32 | self.base_dir.mkdir(parents=True, exist_ok=True)
33 | self.base_dir.chmod(0o750)
34 | logger.info(f'created base directory {self.base_dir} (mod: {self.base_dir.stat().st_mode:o})')
35 | if not self.data_dir.exists():
36 | self.data_dir.mkdir(parents=True, exist_ok=True)
37 | self.base_dir.chmod(0o750)
38 | logger.info(f'created data directory {self.data_dir} (mod: {self.base_dir.stat().st_mode:o})')
39 | with lock:
40 | self._db = sqlite3.connect(self.base_dir / 'db.sqlite3')
41 | self._db.execute('''
42 | CREATE TABLE IF NOT EXISTS files (
43 | key TEXT PRIMARY KEY,
44 | type TEXT NOT NULL DEFAULT '',
45 | size INTEGER NOT NULL CHECK (size >= 0)
46 | )
47 | ''')
48 |
49 | def stop(self) -> None:
50 | with lock:
51 | self._db.close()
52 |
53 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
54 | if key.startswith('/'):
55 | key = key[1:]
56 | path = self.data_dir / key
57 | size = 0
58 | with open(path, 'wb') as f:
59 | while True:
60 | chunk = data.read(self._file_buffering)
61 | if isinstance(chunk, str):
62 | chunk_bytes = chunk.encode('utf-8')
63 | else:
64 | chunk_bytes = chunk
65 | if not chunk_bytes:
66 | break
67 | size += len(chunk_bytes)
68 | f.write(chunk_bytes)
69 | path.chmod(0o640)
70 | data.seek(0)
71 | info = self.check(key)
72 | with lock:
73 | if info is None:
74 | self._db.execute(
75 | 'INSERT INTO files (key, type, size) VALUES (?, ?, ?)',
76 | (key, content_type, size),
77 | )
78 | else:
79 | self._db.execute(
80 | 'UPDATE files SET type = ?, size = ? WHERE key = ?',
81 | (content_type, size, key),
82 | )
83 | url = urljoin(self.public_endpoint, key)
84 | return urlparse(url)
85 |
86 | def check(self, key: str) -> ObjectInfo | None:
87 | if key.startswith('/'):
88 | key = key[1:]
89 | with lock:
90 | cursor = self._db.execute(
91 | 'SELECT type, size FROM files WHERE key = ?',
92 | (key,),
93 | )
94 | row = cursor.fetchone()
95 | if row is None:
96 | return None
97 | content_type, size = row
98 | url = urljoin(self.public_endpoint, key)
99 | return ObjectInfo(type=content_type, uri=urlparse(url), size=size)
100 |
101 | @contextmanager
102 | def get(self, key: str) -> Iterator[IO[bytes]]:
103 | if key.startswith('/'):
104 | key = key[1:]
105 | path = self.data_dir / key
106 | if not path.exists():
107 | yield EMPTY_FILE
108 | else:
109 | with open(path, 'rb') as f:
110 | yield f
111 |
--------------------------------------------------------------------------------
/podmaker/storage/s3.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | __all__ = ['S3']
4 |
5 | import base64
6 | import hashlib
7 | import logging
8 | import sys
9 | from contextlib import contextmanager
10 | from tempfile import SpooledTemporaryFile
11 | from typing import IO, AnyStr, Iterator
12 | from urllib.parse import ParseResult, urljoin, urlparse
13 |
14 | from podmaker.config import S3Config
15 | from podmaker.storage import ObjectInfo, Storage
16 | from podmaker.storage.core import EMPTY_FILE
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 | try:
21 | import boto3
22 | from botocore.exceptions import ClientError
23 | except ImportError:
24 | logger.error('boto3 is not installed, S3 storage is not available')
25 | sys.exit(1)
26 |
27 |
28 | class S3(Storage):
29 | _md5_chunk_size = 10 * 1024 * 1024 # 10MB
30 | _file_buffering = 10 * 1024 * 1024 # 10MB
31 |
32 | def __init__(self, config: S3Config):
33 | self.s3 = boto3.resource(
34 | 's3', endpoint_url=str(config.endpoint), aws_access_key_id=config.access_key,
35 | aws_secret_access_key=config.access_secret)
36 | self.bucket = self.s3.Bucket(config.bucket)
37 | self.public_endpoint = str(config.public_endpoint)
38 |
39 | def _calculate_md5(self, data: IO[AnyStr]) -> str:
40 | logger.debug('calculate md5')
41 | md5 = hashlib.md5()
42 | while True:
43 | chunk = data.read(self._md5_chunk_size)
44 | if not chunk:
45 | break
46 | if isinstance(chunk, str):
47 | md5.update(chunk.encode())
48 | elif isinstance(chunk, bytes):
49 | md5.update(chunk)
50 | else:
51 | raise TypeError(f'chunk must be str or bytes, not {type(chunk)}')
52 | data.seek(0)
53 | return base64.b64encode(md5.digest()).decode()
54 |
55 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
56 | if key.startswith('/'):
57 | key = key[1:]
58 | md5 = self._calculate_md5(data)
59 | logger.info(f'upload: {key} (md5: {md5})')
60 | self.bucket.put_object(Key=key, ContentMD5=md5, Body=data, ContentType=content_type)
61 | logger.info(f'uploaded: {key}')
62 | data.seek(0)
63 | return self.get_uri(key)
64 |
65 | def check(self, key: str) -> ObjectInfo | None:
66 | logger.debug(f'check: {key}')
67 | if key.startswith('/'):
68 | key = key[1:]
69 | try:
70 | info = self.bucket.Object(key=key)
71 | return ObjectInfo(
72 | uri=self.get_uri(key),
73 | size=info.content_length,
74 | type=info.content_type
75 | )
76 | except ClientError:
77 | return None
78 |
79 | def get_uri(self, key: str) -> ParseResult:
80 | url = urljoin(self.public_endpoint, key)
81 | return urlparse(url)
82 |
83 | @contextmanager
84 | def get(self, key: str) -> Iterator[IO[bytes]]:
85 | logger.info(f'get: {key}')
86 | if key.startswith('/'):
87 | key = key[1:]
88 | with SpooledTemporaryFile(buffering=self._file_buffering) as f:
89 | try:
90 | obj = self.bucket.Object(key=key).get()
91 | while True:
92 | chunk = obj['Body'].read(self._file_buffering)
93 | if not chunk:
94 | break
95 | f.write(chunk)
96 | f.seek(0)
97 | yield f
98 | except ClientError:
99 | logger.debug(f'not found: {key}')
100 | yield EMPTY_FILE
101 |
--------------------------------------------------------------------------------
/podmaker/util/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['exit_signal', 'ExitSignalError', 'retry']
2 |
3 | from podmaker.util.exit import ExitSignalError, exit_signal
4 | from podmaker.util.retry_util import retry
5 |
--------------------------------------------------------------------------------
/podmaker/util/exit.py:
--------------------------------------------------------------------------------
1 | import signal
2 | import threading
3 | from typing import Any, Callable
4 |
5 | _exit_signals = (
6 | signal.SIGINT,
7 | signal.SIGHUP,
8 | signal.SIGTERM,
9 | )
10 |
11 | _lock = threading.Lock()
12 |
13 |
14 | class ExitSignalError(Exception):
15 | pass
16 |
17 |
18 | class ExitSignalRegisterError(Exception):
19 | pass
20 |
21 |
22 | class ExitSignal:
23 | def __init__(self) -> None:
24 | self._is_received = False
25 | self._has_listened = False
26 | self._exit_handlers: list[Callable[[], None]] = []
27 |
28 | def receive(self) -> None:
29 | with _lock:
30 | self._is_received = True
31 |
32 | def check(self) -> None:
33 | with _lock:
34 | if self._is_received:
35 | raise ExitSignalError('exit signal received')
36 |
37 | def register(self, handler: Callable[[], None]) -> None:
38 | with _lock:
39 | if self._has_listened:
40 | raise ExitSignalRegisterError('already listened')
41 | self._exit_handlers.append(handler)
42 |
43 | def _handler(self, *_: Any) -> None:
44 | self.receive()
45 | for handler in self._exit_handlers:
46 | handler()
47 |
48 | def listen(self) -> None:
49 | with _lock:
50 | self._has_listened = True
51 | for sig in _exit_signals:
52 | signal.signal(sig, self._handler)
53 |
54 |
55 | exit_signal = ExitSignal()
56 |
--------------------------------------------------------------------------------
/podmaker/util/retry_util.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import logging
4 | import sys
5 | import time
6 | from datetime import timedelta
7 | from typing import Callable, Tuple, Type, TypeVar
8 |
9 | if sys.version_info < (3, 10):
10 | from typing_extensions import ParamSpec
11 | else:
12 | from typing import ParamSpec
13 |
14 |
15 | P = ParamSpec('P')
16 | T = TypeVar('T')
17 | _logger = logging.getLogger(__name__)
18 |
19 |
20 | def retry(
21 | cnt: int,
22 | *,
23 | wait: timedelta = timedelta(seconds=0),
24 | catch: Type[Exception] | Tuple[Type[Exception], ...] = Exception,
25 | logger: logging.Logger = _logger,
26 | ) -> Callable[[Callable[P, T]], Callable[P, T]]:
27 | """
28 | A decorator to retry the function when exception raised.
29 | The function will be called at least once and at most cnt + 1 times.
30 |
31 | :param cnt: retry count
32 | :param wait: wait time between retries
33 | :param catch: the exception to retry
34 | :param logger: logger to log retry info
35 | """
36 | if cnt <= 0:
37 | raise ValueError('cnt must be positive')
38 | wait_seconds = wait.total_seconds()
39 |
40 | def deco(func: Callable[P, T]) -> Callable[P, T]:
41 | def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
42 | for _ in range(cnt):
43 | try:
44 | return func(*args, **kwargs)
45 | except catch:
46 | logger.warning('retrying...')
47 | if wait_seconds > 0:
48 | logger.warning(f'wait {wait_seconds}s before retry')
49 | time.sleep(wait_seconds)
50 | return func(*args, **kwargs)
51 | return wrapper
52 | return deco
53 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "podmaker"
3 | version = "0.9.0"
4 | description = "Convert online media into podcast feeds."
5 | license = "Unlicense"
6 | authors = ["YogiLiu "]
7 | maintainers = ["YogiLiu "]
8 | readme = "README.md"
9 | homepage = "https://github.com/YogiLiu/podmaker"
10 | repository = "https://github.com/YogiLiu/podmaker"
11 | documentation = "https://github.com/YogiLiu/podmaker/blob/main/README.md"
12 | keywords = ["rss", "youtube", "podcast"]
13 | classifiers = [
14 | "Development Status :: 4 - Beta",
15 | "Environment :: Console"
16 | ]
17 |
18 | [tool.poetry.dependencies]
19 | python = "^3.9"
20 | tomlkit = "^0.12.1"
21 | pydantic = {extras = ["email"], version = "^2.2.0"}
22 | apscheduler = "^3.10.4"
23 | boto3 = { version = "^1.28.27", optional = true }
24 | yt-dlp = { version = "^2023.7.6", optional = true }
25 |
26 | [tool.poetry.extras]
27 | s3 = ["boto3"]
28 | youtube = ["yt-dlp"]
29 | all = ["boto3", "yt-dlp"]
30 |
31 | [tool.poetry.group.dev.dependencies]
32 | boto3-stubs = { extras = ["essential"], version = "^1.28.27" }
33 | autohooks = "^23.7.0"
34 | autohooks-plugin-ruff = "^23.6.1"
35 | autohooks-plugin-mypy = "^23.3.0"
36 | typing-extensions = "^4.7.1"
37 |
38 | [tool.poetry.scripts]
39 | podmaker = 'podmaker.cli:run'
40 |
41 | [tool.autohooks]
42 | mode = "poetry"
43 | pre-commit = ["autohooks.plugins.mypy", "autohooks.plugins.ruff"]
44 |
45 | [tool.ruff]
46 | select = ["C90", "F", "I", "PL"]
47 | target-version = "py39"
48 | line-length = 120
49 |
50 | [tool.mypy]
51 | python_version = "3.9"
52 | strict = true
53 | plugins = ["pydantic.mypy"]
54 |
55 | [[tool.mypy.overrides]]
56 | module = ["yt_dlp", "apscheduler.*"]
57 | ignore_missing_imports = true
58 |
59 |
60 | [tool.commitizen]
61 | name = "cz_conventional_commits"
62 | tag_format = "$version"
63 | version_scheme = "pep440"
64 | version_provider = "poetry"
65 | update_changelog_on_bump = true
66 | major_version_zero = true
67 |
68 | [build-system]
69 | requires = ["poetry-core"]
70 | build-backend = "poetry.core.masonry.api"
71 |
72 | [tool.poetry.urls]
73 | "Bug Tracker" = "https://github.com/YogiLiu/podmaker/issues"
74 |
--------------------------------------------------------------------------------
/systemd/podmaker.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Convert online media into podcast feeds.
3 | Documentation=https://github.com/YogiLiu/podmaker/blob/main/README.md
4 | After=network.target network-online.target
5 | Wants=network-online.target
6 |
7 | [Service]
8 | User=nobody
9 | Type=simple
10 | ExecStart=/opt/podmaker/venv/bin/podmaker -c /opt/podmaker/config.toml
11 |
12 | [Install]
13 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data/apple.rss.test.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Hiking Treks
5 | https://www.apple.com/itunes/podcasts/
6 | en-us
7 | © 2020 John Appleseed
8 | The Sunset Explorers
9 |
10 | Love to get outdoors and discover nature's treasures? Hiking Treks is the
11 | show for you. We review hikes and excursions, review outdoor gear and interview
12 | a variety of naturalists and adventurers. Look for new episodes each week.
13 |
14 | serial
15 |
18 |
19 |
20 |
21 | false
22 | -
23 | trailer
24 | Hiking Treks Trailer
25 |
26 | Apple Podcasts.]]>
29 |
30 |
35 | D03EEC9B-B1B4-475B-92C8-54F853FA2A22
36 | Tue, 8 Jan 2019 01:15:00 GMT
37 | 1079
38 | false
39 |
40 | -
41 | full
42 | 4
43 | 2
44 | S02 EP04 Mt. Hood, Oregon
45 |
46 | Tips for trekking around the tallest mountain in Oregon
47 |
48 |
53 | 22BCFEBF-44FB-4A19-8229-7AC678629F57
54 | Tue, 07 May 2019 12:00:00 GMT
55 | 1024
56 | false
57 |
58 | -
59 | full
60 | 3
61 | 2
62 | S02 EP03 Bouldering Around Boulder
63 |
64 | We explore fun walks to climbing areas about the beautiful Colorado city of Boulder.
65 |
66 |
69 | href="http://example.com/podcasts/everything/
70 |
75 | BE486CAA-B3D5-4FB0-8298-EFEBE71C5982
76 | Tue, 30 Apr 2019 13:00:00 EST
77 | 3627
78 | false
79 |
80 | -
81 | full
82 | 2
83 | 2
84 | S02 EP02 Caribou Mountain, Maine
85 |
86 | Put your fitness to the test with this invigorating hill climb.
87 |
88 |
91 |
96 | 142FAFE9-B1DF-4F6D-BAA8-79BDBAF653A9
97 | Tue, 23 May 2019 02:00:00 -0700
98 | 2434
99 | false
100 |
101 | -
102 | full
103 | 1
104 | 2
105 | S02 EP01 Stawamus Chief
106 |
107 | We tackle Stawamus Chief outside of Vancouver, BC and you should too!
108 |
109 |
114 | 5F1DBAEB-3327-49FB-ACB3-DB0158A1D0A3
115 | 2019-02-16T07:00:00.000Z
116 | 13:24
117 | false
118 |
119 | -
120 | full
121 | 4
122 | 1
123 | S01 EP04 Kuliouou Ridge Trail
124 |
125 | Oahu, Hawaii, has some picturesque hikes and this is one of the best!
126 |
127 |
132 | B5FCEB80-317C-4CD0-A84B-807065B43FB9
133 | Tue, 27 Nov 2018 01:15:00 +0000
134 | 929
135 | false
136 |
137 | -
138 | full
139 | 3
140 | 1
141 | S01 EP03 Blood Mountain Loop
142 |
143 | Hiking the Appalachian Trail and Freeman Trail in Georgia
144 |
145 |
150 | F0C5D763-ED85-4449-9C09-81FEBDF6F126
151 | Tue, 23 Oct 2018 01:15:00 +0000
152 | 1440
153 | false
154 |
155 | -
156 | full
157 | 2
158 | 1
159 | S01 EP02 Garden of the Gods Wilderness
160 |
161 | Wilderness Area Garden of the Gods in Illinois is a delightful spot for
162 | an extended hike.
163 |
164 |
169 | 821DD0B2-571D-4DFD-8E11-556E8C1EFE6A
170 | Tue, 18 Sep 2018 01:15:00 +0000
171 | 839
172 | false
173 |
174 | -
175 | full
176 | 1
177 | 1
178 | S01 EP01 Upper Priest Lake Trail to Continental Creek Trail
179 |
180 | We check out this powerfully scenic hike following the river in the Idaho
181 | Panhandle National Forests.
182 |
183 |
188 | EABDA7EE-1AC6-4B60-9E11-6B3F30B72F87
189 | Tue, 14 Aug 2018 01:15:00 +0000
190 | 1399
191 | false
192 |
193 |
194 |
--------------------------------------------------------------------------------
/tests/data/google.rss.test.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | Dafna 的斑马饲养播客
6 |
7 | dafna@example.com
8 |
9 | Dafna
10 | 一个宠物主人关于饲养人气条纹马的指南
11 |
12 | zh-cn
13 | https://www.example.com/podcasts/dafnas-zebras/
14 | -
15 | 关于饲养斑马的十大误区
16 | 这里介绍了关于照顾、喂养和繁殖可爱条纹动物的十大误区。
17 | Tue, 14 Mar 2017 12:00:00 GMT
18 |
20 | 30:00
21 | dzpodtop10
22 |
23 | -
24 | 让斑马保持整洁干净
25 | 让斑马保持干净非常耗时,但付出的努力是值得的。
26 | Fri, 24 Feb 2017 12:00:00 GMT
27 |
29 | 22:48
30 | dzpodclean
31 |
32 |
33 |
--------------------------------------------------------------------------------
/tests/helper.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta
2 | from urllib.error import URLError
3 | from urllib.request import urlopen
4 |
5 |
6 | def network_available(url: str, timeout: timedelta = timedelta(seconds=10)) -> bool:
7 | try:
8 | urlopen(url, timeout=timeout.total_seconds())
9 | return True
10 | except URLError:
11 | return False
12 |
--------------------------------------------------------------------------------
/tests/provider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/provider/__init__.py
--------------------------------------------------------------------------------
/tests/provider/test_resource.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import unittest
3 |
4 | from podmaker.rss import Resource
5 | from podmaker.util import ExitSignalError, exit_signal
6 |
7 | parent, child = multiprocessing.Pipe()
8 |
9 |
10 | def exit_signal_tester() -> None:
11 | class Tester(Resource[None]):
12 | def get(self) -> None:
13 | return None
14 |
15 | t = Tester()
16 | exit_signal.receive()
17 | try:
18 | t.get()
19 | except BaseException as e:
20 | child.send(e)
21 | else:
22 | child.send(None)
23 |
24 |
25 | class TestResource(unittest.TestCase):
26 | def test_exit_signal(self) -> None:
27 | p = multiprocessing.Process(target=exit_signal_tester)
28 | p.start()
29 | p.join()
30 | self.assertIsInstance(parent.recv(), ExitSignalError)
31 |
--------------------------------------------------------------------------------
/tests/provider/test_youtube.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | import unittest
5 | from datetime import date
6 | from typing import IO, Any, AnyStr
7 | from urllib.parse import ParseResult, urlparse
8 |
9 | from podmaker.config import OwnerConfig, SourceConfig
10 | from podmaker.fetcher.youtube import YouTube
11 | from podmaker.storage import ObjectInfo, Storage
12 | from tests.helper import network_available
13 |
14 | if sys.version_info >= (3, 11):
15 | pass
16 | else:
17 | pass
18 |
19 |
20 | class MockStorage(Storage):
21 | cnt = 0
22 |
23 | def put(self, data: IO[AnyStr], key: str, *, content_type: str = '') -> ParseResult:
24 | assert data.name.endswith('.mp3'), 'only mp3 is supported'
25 | assert self.cnt % 2 == 1, 'file already exists'
26 | return urlparse('https://example.com')
27 |
28 | def check(self, key: str) -> ObjectInfo:
29 | self.cnt += 1
30 | return ObjectInfo(
31 | uri=urlparse('https://example.com'),
32 | size=0,
33 | type='audio/mp3'
34 | )
35 |
36 | def get(self, key: str) -> Any:
37 | pass
38 |
39 |
40 | @unittest.skipUnless(network_available('https://www.youtube.com'), 'network is not available')
41 | class TestYoutube(unittest.TestCase):
42 | cases = [
43 | {
44 | 'source': SourceConfig(
45 | id='youtube',
46 | url='https://www.youtube.com/playlist?list=PLOU2XLYxmsILHvpAkROp2dXz-jQi4S4_y',
47 | regex=r'Introduction to ARCore Augmented Faces, \w+'
48 | ),
49 | 'attr': (
50 | 'Introduction to ARCore Augmented Faces',
51 | 'Learn how to use ARCore’s Augmented Faces APIs to create face effects with Unity, Android, and iOS.',
52 | 'Google for Developers',
53 | ),
54 | 'items': [
55 | ('8ih7eHwPoxM', 'Introduction to ARCore Augmented Faces, Unity', date.fromisoformat('2019-09-12')),
56 | ('-4EvaCQpVEQ', 'Introduction to ARCore Augmented Faces, Android', date.fromisoformat('2019-09-12')),
57 | ('QAqOTaCCD9M', 'Introduction to ARCore Augmented Faces, iOS', date.fromisoformat('2019-09-12')),
58 | ]
59 | },
60 | {
61 | 'source': SourceConfig(
62 | id='youtube',
63 | url='https://www.youtube.com/@PyCon2015/videos'
64 | ),
65 | 'attr': (
66 | 'PyCon 2015 - Videos',
67 | '',
68 | 'PyCon 2015',
69 | ),
70 | 'items': [
71 | ('G-uKNd5TSBw', 'Keynote - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-16')),
72 | ('lNqtyi3sM-k', 'Keynote - Gabriella Coleman - PyCon 2015', date.fromisoformat('2015-04-16')),
73 | ('2wDvzy6Hgxg', 'Type Hints - Guido van Rossum - PyCon 2015', date.fromisoformat('2015-04-12')),
74 | ]
75 | },
76 | ]
77 |
78 | def setUp(self) -> None:
79 | storage = MockStorage()
80 | self.youtube = YouTube(
81 | storage,
82 | OwnerConfig(name='Podmaker', email='test@podmaker.dev')
83 | )
84 |
85 | def test_fetch(self) -> None:
86 | for case in self.cases:
87 | source = case['source']
88 | attr = case['attr']
89 | podcast = self.youtube.fetch(source) # type: ignore[arg-type]
90 | self.assertEqual(urlparse(str(source.url)), podcast.link) # type: ignore[attr-defined]
91 | self.assertEqual(attr[0], podcast.title) # type: ignore[index]
92 | self.assertIsNotNone(podcast.image.ensure())
93 | self.assertEqual(attr[1], podcast.description) # type: ignore[index]
94 | self.assertEqual('Podmaker', podcast.owner.name) # type: ignore[union-attr]
95 | self.assertEqual('test@podmaker.dev', podcast.owner.email) # type: ignore[union-attr]
96 | self.assertEqual(attr[2], podcast.author) # type: ignore[index]
97 | self.assertEqual([], podcast.categories)
98 | self.assertFalse(podcast.explicit)
99 | self.assertEqual('en', podcast.language)
100 | items = case['items']
101 | for (idx, episode) in enumerate(podcast.items.ensure()):
102 | if idx >= len(items): # type: ignore[arg-type]
103 | break
104 | current = items[idx] # type: ignore[index]
105 | self.assertEqual(current[0], episode.guid)
106 | self.assertEqual(current[1], episode.title)
107 | self.assertIsNotNone(episode.pub_date)
108 | if episode.pub_date is not None:
109 | self.assertEqual(current[2], episode.pub_date.date())
110 | self.assertIsNotNone(episode.link)
111 | self.assertIsNotNone(episode.image.ensure()) # type: ignore[union-attr]
112 | self.assertEqual(urlparse('https://example.com'), episode.enclosure.ensure().url)
113 |
--------------------------------------------------------------------------------
/tests/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/storage/__init__.py
--------------------------------------------------------------------------------
/tests/storage/test_local.py:
--------------------------------------------------------------------------------
1 | import random
2 | import unittest
3 | from io import BytesIO
4 | from pathlib import Path
5 |
6 | from podmaker.config import LocalConfig
7 | from podmaker.storage.local import Local
8 |
9 | file_size = 10
10 |
11 |
12 | class TestS3(unittest.TestCase):
13 | base_dir = Path('/tmp/podmaker')
14 | data_dir = base_dir / 'data'
15 |
16 | def setUp(self) -> None:
17 | self.storage = Local(
18 | LocalConfig(dest='local', base_dir='/tmp/podmaker', public_endpoint='http://localhost:9000')
19 | )
20 | self.storage.start()
21 | self.file = BytesIO()
22 | self.file.write(random.randbytes(file_size))
23 | self.file.seek(0)
24 |
25 | def tearDown(self) -> None:
26 | self.storage.stop()
27 |
28 | # noinspection DuplicatedCode
29 | def test_s3(self) -> None:
30 | for _ in range(2):
31 | result = self.storage.put(self.file, key='/test.bin', content_type='application/octet-stream')
32 | self.assertEqual('http://localhost:9000/test.bin', result.geturl())
33 | self.assertTrue((self.data_dir / 'test.bin').exists())
34 | info = self.storage.check(key='/test.bin')
35 | self.assertIsNotNone(info)
36 | if info is not None:
37 | self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl())
38 | self.assertEqual(self.file.getbuffer().nbytes, info.size)
39 | self.assertEqual('application/octet-stream', info.type)
40 | with self.storage.get(key='/test.bin') as f:
41 | self.assertEqual(self.file.read(), f.read())
42 | self.file.seek(0)
43 |
44 | def test_check_empty(self) -> None:
45 | r = self.storage.check(key='/empty.bin')
46 | self.assertIsNone(r)
47 |
--------------------------------------------------------------------------------
/tests/storage/test_s3.py:
--------------------------------------------------------------------------------
1 | import random
2 | import unittest
3 | from dataclasses import dataclass
4 | from io import BytesIO
5 | from typing import Any, Type
6 | from unittest.mock import patch
7 | from urllib.parse import ParseResult, urlparse
8 |
9 | import boto3
10 | from botocore.exceptions import ClientError
11 |
12 | from podmaker.config import S3Config
13 | from podmaker.storage.s3 import S3
14 |
15 | file_size = 10
16 |
17 |
18 | @dataclass
19 | class MockedObject:
20 | content_length: int
21 | content_type: str
22 |
23 |
24 | # noinspection PyPep8Naming
25 | class MockedBucket:
26 | @staticmethod
27 | def put_object(*, Key: str, **__: Any) -> ParseResult:
28 | return urlparse(f'http://localhost:9000/{Key}')
29 |
30 | @staticmethod
31 | def Object(*, key: str) -> MockedObject:
32 | if key == 'empty.bin':
33 | raise ClientError(error_response={}, operation_name='GetObject')
34 | return MockedObject(content_type='application/octet-stream', content_length=file_size)
35 |
36 |
37 | # noinspection PyPep8Naming
38 | class MockedServiceResource:
39 | @staticmethod
40 | def Bucket(*_: Any, **__: Any) -> MockedBucket:
41 | return MockedBucket()
42 |
43 |
44 | def mock_resource(*_: Any, **__: Any) -> Type[MockedServiceResource]:
45 | return MockedServiceResource
46 |
47 |
48 | class TestS3(unittest.TestCase):
49 | @patch.object(boto3, 'resource', mock_resource)
50 | def setUp(self) -> None:
51 | self.s3 = S3(
52 | S3Config(
53 | dest='s3',
54 | access_key='123',
55 | access_secret='456',
56 | bucket='podmaker',
57 | endpoint='http://localhost:9000',
58 | public_endpoint='http://localhost:9000'
59 | )
60 | )
61 | self.file = BytesIO()
62 | self.file.write(random.randbytes(file_size))
63 | self.file.seek(0)
64 |
65 | def test_s3(self) -> None:
66 | for _ in range(2):
67 | result = self.s3.put(self.file, key='/test.bin', content_type='application/octet-stream')
68 | self.assertEqual('http://localhost:9000/test.bin', result.geturl())
69 | info = self.s3.check(key='/test.bin')
70 | self.assertIsNotNone(info)
71 | if info is not None:
72 | self.assertEqual('http://localhost:9000/test.bin', info.uri.geturl())
73 | self.assertEqual(self.file.getbuffer().nbytes, info.size)
74 | self.assertEqual('application/octet-stream', info.type)
75 |
76 | def test_check_empty(self) -> None:
77 | r = self.s3.check(key='/empty.bin')
78 | self.assertIsNone(r)
79 |
--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 | from pathlib import Path
4 |
5 | from podmaker.config import PMConfig
6 |
7 | if sys.version_info >= (3, 11):
8 | import tomllib as toml
9 | else:
10 | import tomlkit as toml
11 |
12 |
13 | class TestConfig(unittest.TestCase):
14 | def setUp(self) -> None:
15 | self.path = Path(__file__).parent.parent / 'config.example.toml'
16 |
17 | def test_from_file(self) -> None:
18 | config = PMConfig.from_file(self.path)
19 | self.assertEqual(toml.loads(self.path.read_text()), config.model_dump(mode='json'))
20 |
--------------------------------------------------------------------------------
/tests/test_rss.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import math
4 | import unittest
5 | from datetime import datetime, timezone
6 | from email.utils import parsedate_to_datetime
7 | from pathlib import Path
8 | from typing import Any, Callable
9 | from urllib.parse import urlparse
10 | from xml.etree.ElementTree import Element, fromstring
11 |
12 | from podmaker.rss import Episode, Podcast
13 | from podmaker.rss.core import PlainResource, Resource, itunes
14 |
15 |
16 | def convert_to_seconds(duration: str) -> int:
17 | if ':' in duration:
18 | secs = 0
19 | for c in duration.split(':'):
20 | secs = secs * 60 + int(c)
21 | else:
22 | secs = int(duration)
23 | return secs
24 |
25 |
26 | def find_strip_text(el: Element, path: str, namespaces: dict[str, str] | None = None) -> str | None:
27 | text = el.findtext(path, namespaces=namespaces)
28 | if text:
29 | return text.strip()
30 | return None
31 |
32 |
33 | class TestRSS(unittest.TestCase):
34 | def setUp(self) -> None:
35 | self.rss_docs = [
36 | Path('data/apple.rss.test.xml').read_text(),
37 | Path('data/google.rss.test.xml').read_text(),
38 | ]
39 | self.elements = [
40 | fromstring(r)
41 | for r in self.rss_docs
42 | ]
43 |
44 | def test_from_rss(self) -> None: # noqa: PLR0912, C901, PLR0915
45 | for i, element in enumerate(self.elements):
46 | doc = self.rss_docs[i]
47 | podcast = Podcast.from_rss(doc)
48 | self.assertEqual(find_strip_text(element, '.channel/link'), podcast.link.geturl())
49 | self.assertEqual(find_strip_text(element, '.channel/title'), podcast.title)
50 | self.assertEqual(
51 | element.find(
52 | f'.channel/{itunes("image")}', namespaces=itunes.namespace
53 | ).get('href'), # type: ignore[union-attr]
54 | podcast.image.ensure().geturl()
55 | )
56 | self.assertEqual(find_strip_text(element, '.channel/description'), podcast.description)
57 | owner_el = element.find(f'.channel/{itunes("owner")}', namespaces=itunes.namespace)
58 | if owner_el is not None:
59 | owner_name = find_strip_text(owner_el, f'.{itunes("name")}')
60 | if owner_name:
61 | self.assertEqual(owner_name, podcast.owner.name) # type: ignore[union-attr]
62 | else:
63 | self.assertIsNone(podcast.owner.name) # type: ignore[union-attr]
64 | self.assertEqual(
65 | find_strip_text(owner_el, f'.{itunes("email")}'),
66 | podcast.owner.email # type: ignore[union-attr]
67 | )
68 | self.assertEqual(
69 | find_strip_text(element, f'.channel/{itunes("author")}', namespaces=itunes.namespace),
70 | podcast.author
71 | )
72 | c_els = element.findall(f'.channel/{itunes("category")}', namespaces=itunes.namespace)
73 | self.assertEqual(
74 | [c_el.text.strip() for c_el in c_els], # type: ignore[union-attr]
75 | podcast.categories
76 | )
77 | explicit = find_strip_text(element, f'.channel/{itunes("explicit")}', namespaces=itunes.namespace)
78 | if explicit == 'yes':
79 | self.assertTrue(podcast.explicit)
80 | else:
81 | self.assertFalse(podcast.explicit)
82 | language = find_strip_text(element, '.channel/language')
83 | if language:
84 | self.assertEqual(language, podcast.language)
85 | else:
86 | self.assertIsNone(podcast.explicit)
87 | item_els = element.findall('.channel/item')
88 | for j, item in enumerate(podcast.items.ensure()):
89 | el = item_els[j]
90 | enclosure_el = el.find('.enclosure')
91 | self.assertEqual(
92 | enclosure_el.get('url'), # type: ignore[union-attr]
93 | item.enclosure.ensure().url.geturl()
94 | )
95 | self.assertEqual(
96 | enclosure_el.get('type'), # type: ignore[union-attr]
97 | item.enclosure.ensure().type
98 | )
99 | self.assertEqual(
100 | enclosure_el.get('length'), # type: ignore[union-attr]
101 | str(item.enclosure.ensure().length)
102 | )
103 | if find_strip_text(el, '.title'):
104 | self.assertEqual(find_strip_text(el, '.title'), item.title)
105 | else:
106 | self.assertEqual(find_strip_text(el, f'.{itunes("title")}', namespaces=itunes.namespace),
107 | item.title)
108 | desc = find_strip_text(el, '.description')
109 | if desc:
110 | self.assertEqual(desc, item.description)
111 | summary = find_strip_text(el, f'.{itunes("summary")}')
112 | if summary:
113 | self.assertEqual(summary, item.description)
114 | explicit = find_strip_text(el, f'.{itunes("explicit")}', namespaces=itunes.namespace)
115 | if explicit == 'yes':
116 | self.assertTrue(item.explicit)
117 | elif explicit == 'no':
118 | self.assertFalse(item.explicit)
119 | else:
120 | self.assertFalse(item.explicit)
121 | self.assertEqual(el.find('.guid').text, item.guid) # type: ignore[union-attr]
122 | duration = find_strip_text(el, f'.{itunes("duration")}', namespaces=itunes.namespace)
123 | if duration:
124 | if ':' in duration:
125 | secs = 0
126 | for c in duration.split(':'):
127 | secs = secs * 60 + int(c)
128 | else:
129 | secs = int(duration)
130 | self.assertEqual(secs, math.ceil(item.duration.total_seconds())) # type: ignore[union-attr]
131 | else:
132 | self.assertIsNone(item.duration)
133 | pub_date = find_strip_text(el, 'pubDate')
134 | if pub_date:
135 | try:
136 | dt = parsedate_to_datetime(pub_date)
137 | except (TypeError, ValueError):
138 | if pub_date.endswith('Z'):
139 | pub_date = pub_date[:-1] + '+00:00'
140 | dt = datetime.fromisoformat(pub_date)
141 | self.assertEqual(dt.date(), item.pub_date.date()) # type: ignore[union-attr]
142 | self.assertEqual(dt.time(), item.pub_date.time()) # type: ignore[union-attr]
143 | else:
144 | self.assertIsNone(item.pub_date)
145 |
146 | def test_xml(self) -> None: # noqa: PLR0912, C901
147 | cases: list[str | list[str] | dict[str, Any]] = [
148 | '.',
149 | '.channel',
150 | '.channel/title',
151 | f'.channel/{itunes("owner")}/{itunes("email")}',
152 | f'.channel/{itunes("author")}',
153 | '.channel/description',
154 | [
155 | '.channel/description',
156 | f'.channel/{itunes("summary")}'
157 | ],
158 | f'.channel/{itunes("image")}',
159 | [
160 | '.channel/title',
161 | '.channel/image/title'
162 | ],
163 | [
164 | '.channel/link',
165 | '.channel/image/link'
166 | ],
167 | {
168 | 'a': f'.channel/{itunes("image")}',
169 | 'b': '.channel/image/url',
170 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
171 | },
172 | '.channel/language',
173 | '.channel/link',
174 | '.channel/item/[1]/title',
175 | '.channel/item/[1]/description',
176 | '.channel/item/[1]/pubDate',
177 | '.channel/item/[1]/enclosure',
178 | f'.channel/item/[1]/{itunes("duration")}',
179 | '.channel/item/[1]/guid',
180 | '.channel/item/[1]/link',
181 | {
182 | 'a': f'.channel/item/[1]/{itunes("image")}',
183 | 'b': f'.channel/item/[1]/{itunes("image")}',
184 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
185 | },
186 | '.channel/item/[2]/title',
187 | '.channel/item/[2]/description',
188 | '.channel/item/[2]/pubDate',
189 | '.channel/item/[2]/enclosure',
190 | f'.channel/item/[2]/{itunes("duration")}',
191 | '.channel/item/[2]/guid',
192 | '.channel/item/[2]/link',
193 | {
194 | 'a': f'.channel/item/[2]/{itunes("image")}',
195 | 'b': f'.channel/item/[3]/{itunes("image")}',
196 | 'action': lambda el: el.text if el.tag == 'url' else el.get('href')
197 | },
198 | ]
199 | for idx, element in enumerate(self.elements):
200 | doc = self.rss_docs[idx]
201 | podcast = Podcast.from_rss(doc)
202 | xml = podcast.xml
203 | for case in cases:
204 | if isinstance(case, dict):
205 | a = element.find(case['a'])
206 | if a is None:
207 | continue
208 | b = xml.find(case['b'])
209 | action: Callable[[Element], Any] = case['action']
210 | self.assertEqual(action(a), action(b), case) # type: ignore[arg-type]
211 | else:
212 | if isinstance(case, list):
213 | a = element.find(case[0])
214 | b = xml.find(case[1])
215 | else:
216 | a = element.find(case)
217 | b = xml.find(case)
218 | if a is None:
219 | continue
220 | if a.text:
221 | a.text = a.text.strip()
222 | if b.text: # type: ignore[union-attr]
223 | b.text = b.text.strip() # type: ignore[union-attr]
224 | a_t = a.text or a.attrib.pop('text', '')
225 | b_t = b.text or b.attrib.pop('text', '') # type: ignore[union-attr]
226 | if 'pubDate' in case:
227 | self.assertEqual(
228 | parsedate_to_datetime(a_t), parsedate_to_datetime(b_t), case) # type: ignore[arg-type]
229 | elif 'duration' in case:
230 | self.assertEqual(convert_to_seconds(a_t), convert_to_seconds(b_t), # type: ignore[arg-type]
231 | case)
232 | else:
233 | self.assertEqual(a_t, b_t, case)
234 | b_attr = b.attrib.copy() # type: ignore[union-attr]
235 | if 'isPermaLink' not in a.attrib:
236 | b_attr.pop('isPermaLink', None)
237 | self.assertEqual(a.attrib, b_attr, case)
238 |
239 | def test_merge(self) -> None:
240 | for doc in self.rss_docs:
241 | ap = Podcast.from_rss(doc)
242 | bp = Podcast.from_rss(doc)
243 | self.assertFalse(ap.merge(bp))
244 | items = list(bp.items.ensure())
245 | items.insert(
246 | 0,
247 | Episode(
248 | enclosure=items[0].enclosure,
249 | title='foo',
250 | description='bar',
251 | guid='baz',
252 | duration=items[0].duration,
253 | explicit=False,
254 | pub_date=datetime.now(timezone.utc),
255 | )
256 | )
257 | cases = [
258 | ('items', PlainResource(items)),
259 | ('link', urlparse('https://example.com')),
260 | ('title', 'foo'),
261 | ('image', PlainResource(urlparse('https://example.com/image.png'))),
262 | ('description', 'bar'),
263 | ('author', 'baz'),
264 | ('categories', ['foo', 'bar']),
265 | ('explicit', True),
266 | ('language', 'ja'),
267 | ]
268 | for field, value in cases:
269 | setattr(bp, field, value)
270 | self.assertTrue(ap.merge(bp), f'{field} is not merged')
271 | if isinstance(value, Resource):
272 | ar = getattr(ap, field).get()
273 | br = getattr(bp, field).get()
274 | if isinstance(ar, list):
275 | ar = set(ar)
276 | br = set(br)
277 | self.assertEqual(ar, br, f'{field} is not merged: {value}')
278 | else:
279 | self.assertEqual(getattr(ap, field), value, f'{field} is not merged: {value}')
280 |
--------------------------------------------------------------------------------
/tests/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YogiLiu/podmaker/93153aedfe643f97e912a2ca8cb77df311070a2b/tests/util/__init__.py
--------------------------------------------------------------------------------
/tests/util/test_retry.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest import mock
3 |
4 | from podmaker.util import retry
5 |
6 |
7 | class TestRetry(unittest.TestCase):
8 | def test_no_exception(self) -> None:
9 | spy = mock.Mock(return_value=1)
10 | func = retry(3)(spy)
11 | self.assertEqual(1, func())
12 | self.assertEqual(1, spy.call_count)
13 |
14 | def test_retry_success(self) -> None:
15 | spy = mock.Mock(side_effect=[Exception, 1])
16 | func = retry(3)(spy)
17 | self.assertEqual(1, func())
18 | self.assertEqual(2, spy.call_count)
19 |
20 | def test_retry_failed(self) -> None:
21 | spy = mock.Mock(side_effect=Exception)
22 | func = retry(3)(spy)
23 | self.assertRaises(Exception, func)
24 | self.assertEqual(4, spy.call_count)
25 |
26 | def test_specify_exception(self) -> None:
27 | spy = mock.Mock(side_effect=ValueError)
28 | func = retry(3, catch=TypeError)(spy)
29 | self.assertRaises(ValueError, func)
30 | self.assertEqual(1, spy.call_count)
31 |
32 | spy = mock.Mock(side_effect=ValueError)
33 | func = retry(3, catch=ValueError)(spy)
34 | self.assertRaises(ValueError, func)
35 | self.assertEqual(4, spy.call_count)
36 |
--------------------------------------------------------------------------------