├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── README.zh-CN.md ├── configserver ├── __init__.py ├── server.py └── templates │ ├── config.html │ ├── index.html │ └── source.html ├── main.py ├── preview.png ├── resolvers.conf ├── run.sh ├── scrapeflows ├── bangumi_movie.json ├── bangumi_tvshow.json ├── bangumi_tvshow_episode.json ├── douban_movie.json ├── douban_tvshow.json ├── douban_tvshow_episode.json ├── maoyan_movie.json ├── maoyan_tvshow.json ├── maoyan_tvshow_episode.json ├── mtime_movie.json ├── mtime_tvshow.json ├── mtime_tvshow_episode.json ├── tmdb_movie.json ├── tmdb_tvshow.json └── tmdb_tvshow_episode.json ├── scraper ├── __init__.py ├── enums.py ├── exceptions.py ├── fake.py ├── functions │ ├── __init__.py │ ├── collect.py │ ├── doh.py │ ├── loop.py │ ├── request.py │ └── retval.py ├── scraper.py └── utils.py ├── setup.py └── version.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release Python Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - v[0-9]* 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout Repository 16 | uses: actions/checkout@v3 17 | - name: Setup Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.8' 21 | - name: Build Package 22 | run: python setup.py sdist --formats=zip 23 | - name: Create Release 24 | uses: ncipollo/release-action@v1 25 | with: 26 | artifacts: "dist/*.zip" 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### VisualStudioCode template 2 | .vscode/* 3 | 4 | # Local History for Visual Studio Code 5 | .history/ 6 | 7 | # Built Visual Studio Code Extensions 8 | *.vsix 9 | 10 | ### JetBrains template 11 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 12 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 13 | 14 | # User-specific stuff 15 | .idea/**/workspace.xml 16 | .idea/**/tasks.xml 17 | .idea/**/usage.statistics.xml 18 | .idea/**/dictionaries 19 | .idea/**/shelf 20 | 21 | # AWS User-specific 22 | .idea/**/aws.xml 23 | 24 | # Generated files 25 | .idea/**/contentModel.xml 26 | 27 | # Sensitive or high-churn files 28 | .idea/**/dataSources/ 29 | .idea/**/dataSources.ids 30 | .idea/**/dataSources.local.xml 31 | .idea/**/sqlDataSources.xml 32 | .idea/**/dynamic.xml 33 | .idea/**/uiDesigner.xml 34 | .idea/**/dbnavigator.xml 35 | 36 | # Gradle 37 | .idea/**/gradle.xml 38 | .idea/**/libraries 39 | 40 | # Gradle and Maven with auto-import 41 | # When using Gradle or Maven with auto-import, you should exclude module files, 42 | # since they will be recreated, and may cause churn. Uncomment if using 43 | # auto-import. 44 | # .idea/artifacts 45 | # .idea/compiler.xml 46 | # .idea/jarRepositories.xml 47 | # .idea/modules.xml 48 | # .idea/*.iml 49 | # .idea/modules 50 | # *.iml 51 | # *.ipr 52 | 53 | # CMake 54 | cmake-build-*/ 55 | 56 | # Mongo Explorer plugin 57 | .idea/**/mongoSettings.xml 58 | 59 | # File-based project format 60 | *.iws 61 | 62 | # IntelliJ 63 | out/ 64 | 65 | # mpeltonen/sbt-idea plugin 66 | .idea_modules/ 67 | 68 | # JIRA plugin 69 | atlassian-ide-plugin.xml 70 | 71 | # Cursive Clojure plugin 72 | .idea/replstate.xml 73 | 74 | # SonarLint plugin 75 | .idea/sonarlint/ 76 | 77 | # Crashlytics plugin (for Android Studio and IntelliJ) 78 | com_crashlytics_export_strings.xml 79 | crashlytics.properties 80 | crashlytics-build.properties 81 | fabric.properties 82 | 83 | # Editor-based Rest Client 84 | .idea/httpRequests 85 | 86 | # Android studio 3.1+ serialized cache file 87 | .idea/caches/build_file_checksums.ser 88 | 89 | ### VirtualEnv template 90 | # Virtualenv 91 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 92 | .Python 93 | [Bb]in 94 | [Ii]nclude 95 | [Ll]ib 96 | [Ll]ib64 97 | [Ll]ocal 98 | [Ss]cripts 99 | pyvenv.cfg 100 | .venv 101 | pip-selfcheck.json 102 | 103 | ### Eclipse template 104 | .metadata 105 | bin/ 106 | tmp/ 107 | *.tmp 108 | *.bak 109 | *.swp 110 | *~.nib 111 | local.properties 112 | .settings/ 113 | .loadpath 114 | .recommenders 115 | 116 | # External tool builders 117 | .externalToolBuilders/ 118 | 119 | # Locally stored "Eclipse launch configurations" 120 | *.launch 121 | 122 | # PyDev specific (Python IDE for Eclipse) 123 | *.pydevproject 124 | 125 | # CDT-specific (C/C++ Development Tooling) 126 | .cproject 127 | 128 | # CDT- autotools 129 | .autotools 130 | 131 | # Java annotation processor (APT) 132 | .factorypath 133 | 134 | # PDT-specific (PHP Development Tools) 135 | .buildpath 136 | 137 | # sbteclipse plugin 138 | .target 139 | 140 | # Tern plugin 141 | .tern-project 142 | 143 | # TeXlipse plugin 144 | .texlipse 145 | 146 | # STS (Spring Tool Suite) 147 | .springBeans 148 | 149 | # Code Recommenders 150 | .recommenders/ 151 | 152 | # Annotation Processing 153 | .apt_generated/ 154 | .apt_generated_test/ 155 | 156 | # Scala IDE specific (Scala & Java development for Eclipse) 157 | .cache-main 158 | .scala_dependencies 159 | .worksheet 160 | 161 | # Uncomment this line if you wish to ignore the project description file. 162 | # Typically, this file would be tracked if it contains build/dependency configurations: 163 | #.project 164 | 165 | ### Windows template 166 | # Windows thumbnail cache files 167 | Thumbs.db 168 | Thumbs.db:encryptable 169 | ehthumbs.db 170 | ehthumbs_vista.db 171 | 172 | # Dump file 173 | *.stackdump 174 | 175 | # Folder config file 176 | [Dd]esktop.ini 177 | 178 | # Recycle Bin used on file shares 179 | $RECYCLE.BIN/ 180 | 181 | # Windows Installer files 182 | *.cab 183 | *.msi 184 | *.msix 185 | *.msm 186 | *.msp 187 | 188 | # Windows shortcuts 189 | *.lnk 190 | 191 | ### macOS template 192 | # General 193 | .DS_Store 194 | .AppleDouble 195 | .LSOverride 196 | 197 | # Icon must end with two \r 198 | Icon 199 | 200 | # Thumbnails 201 | ._* 202 | 203 | # Files that might appear in the root of a volume 204 | .DocumentRevisions-V100 205 | .fseventsd 206 | .Spotlight-V100 207 | .TemporaryItems 208 | .Trashes 209 | .VolumeIcon.icns 210 | .com.apple.timemachine.donotpresent 211 | 212 | # Directories potentially created on remote AFP share 213 | .AppleDB 214 | .AppleDesktop 215 | Network Trash Folder 216 | Temporary Items 217 | .apdisk 218 | 219 | ### Python template 220 | # Byte-compiled / optimized / DLL files 221 | __pycache__/ 222 | *.py[cod] 223 | *$py.class 224 | 225 | # C extensions 226 | *.so 227 | 228 | # Distribution / packaging 229 | .Python 230 | build/ 231 | develop-eggs/ 232 | dist/ 233 | downloads/ 234 | eggs/ 235 | .eggs/ 236 | lib/ 237 | lib64/ 238 | parts/ 239 | sdist/ 240 | var/ 241 | wheels/ 242 | share/python-wheels/ 243 | *.egg-info/ 244 | .installed.cfg 245 | *.egg 246 | MANIFEST 247 | 248 | # PyInstaller 249 | # Usually these files are written by a python script from a template 250 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 251 | *.manifest 252 | *.spec 253 | 254 | # Installer logs 255 | pip-log.txt 256 | pip-delete-this-directory.txt 257 | 258 | # Unit test / coverage reports 259 | htmlcov/ 260 | .tox/ 261 | .nox/ 262 | .coverage 263 | .coverage.* 264 | .cache 265 | nosetests.xml 266 | coverage.xml 267 | *.cover 268 | *.py,cover 269 | .hypothesis/ 270 | .pytest_cache/ 271 | cover/ 272 | 273 | # Translations 274 | *.mo 275 | *.pot 276 | 277 | # Django stuff: 278 | *.log 279 | local_settings.py 280 | db.sqlite3 281 | db.sqlite3-journal 282 | 283 | # Flask stuff: 284 | instance/ 285 | .webassets-cache 286 | 287 | # Scrapy stuff: 288 | .scrapy 289 | 290 | # Sphinx documentation 291 | docs/_build/ 292 | 293 | # PyBuilder 294 | .pybuilder/ 295 | target/ 296 | 297 | # Jupyter Notebook 298 | .ipynb_checkpoints 299 | 300 | # IPython 301 | profile_default/ 302 | ipython_config.py 303 | 304 | # pyenv 305 | # For a library or package, you might want to ignore these files since the code is 306 | # intended to run in multiple environments; otherwise, check them in: 307 | # .python-version 308 | 309 | # pipenv 310 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 311 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 312 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 313 | # install all needed dependencies. 314 | #Pipfile.lock 315 | 316 | # poetry 317 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 318 | # This is especially recommended for binary packages to ensure reproducibility, and is more 319 | # commonly ignored for libraries. 320 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 321 | #poetry.lock 322 | 323 | # pdm 324 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 325 | #pdm.lock 326 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 327 | # in version control. 328 | # https://pdm.fming.dev/#use-with-ide 329 | .pdm.toml 330 | 331 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 332 | __pypackages__/ 333 | 334 | # Celery stuff 335 | celerybeat-schedule 336 | celerybeat.pid 337 | 338 | # SageMath parsed files 339 | *.sage.py 340 | 341 | # Environments 342 | .env 343 | .venv 344 | env/ 345 | venv/ 346 | ENV/ 347 | env.bak/ 348 | venv.bak/ 349 | 350 | # Spyder project settings 351 | .spyderproject 352 | .spyproject 353 | 354 | # Rope project settings 355 | .ropeproject 356 | 357 | # mkdocs documentation 358 | /site 359 | 360 | # mypy 361 | .mypy_cache/ 362 | .dmypy.json 363 | dmypy.json 364 | 365 | # Pyre type checker 366 | .pyre/ 367 | 368 | # pytype static type analyzer 369 | .pytype/ 370 | 371 | # Cython debug symbols 372 | cython_debug/ 373 | 374 | # PyCharm 375 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 376 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 377 | # and can be added to the global gitignore or merged into this file. For a more nuclear 378 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 379 | .idea/ 380 | 381 | # syno-videoinfo-plugin 382 | INFO 383 | .cache_* 384 | scrapeflows.conf 385 | configserver/authorization -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # *Syno*logy Video Info Plugin 2 | 3 | [![GitHub Release](https://img.shields.io/github/v/release/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=blue)](https://github.com/C5H12O5/syno-videoinfo-plugin/releases) 4 | ![GitHub Stars](https://img.shields.io/github/stars/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=yellow) 5 | ![GitHub Downloads](https://img.shields.io/github/downloads/C5H12O5/syno-videoinfo-plugin/total?logo=github&style=flat&color=green) 6 | ![Python Support](https://img.shields.io/badge/Python-3.6+-green?logo=python&style=flat&color=steelblue) 7 | [![GitHub License](https://img.shields.io/github/license/C5H12O5/syno-videoinfo-plugin?logo=apache&style=flat&color=lightslategray)](LICENSE) 8 | 9 | ###### 📖 English / 📖 [简体中文](README.zh-CN.md) 10 | 11 | This project is a video information plugin for Synology **Video Station**. It provides a way to fetch metadata from websites 12 | other than the default ones. 13 | 14 | * Implemented in Python without any third-party dependencies. 15 | * Supports multiple sources, and can be easily extended to support more. 16 | * Has a simple configuration page where you can customize your plugin. 17 | 18 | ![preview](preview.png) 19 | 20 | ## Usage 21 | 22 | Install the plugin: 23 | 24 | 1. Download the latest release from [***here***](https://github.com/C5H12O5/syno-videoinfo-plugin/releases). 25 | 2. Open your **Video Station**, go to ***Settings*** > ***Video Info Plugin***. 26 | 3. Click **[Add]**, select the downloaded file, and click **[OK]**. 27 | 28 | Configure the plugin: 29 | 30 | 1. Open your browser, go to `http://[NAS_IP]:5125` (replace `[NAS_IP]` with your NAS IP address) page. 31 | 2. Change the configuration as you want, and click **[ :floppy_disk: ]** button in the upper right corner. 32 | 3. Go back to your **Video Station**, the configuration should be applied automatically. 33 | > [!NOTE] 34 | > If you upgrade the plugin but the configuration page is not updated, you can restart the configuration service by following steps: 35 | > 1. Open the configuration page, click **[Exit]** button in the upper right corner to close the service. 36 | > 2. Go back to **Video Station**, go to ***Settings*** > ***Video Info Plugin***, and click **[Test Connection]** button to restart the service. 37 | 38 | ## Requirements 39 | 40 | * Python 3.6+ 41 | * Video Station 2.5.0+ for DSM 6.0 42 | * Video Station 3.0.0+ for DSM 7.0 43 | 44 | ## References 45 | 46 | * [The Video Station Metadata](https://kb.synology.com/en-id/DSM/help/VideoStation/metadata?version=7) 47 | * [The Video Station API documentation](https://download.synology.com/download/Document/Software/DeveloperGuide/Package/VideoStation/All/enu/Synology_Video_Station_API_enu.pdf) 48 | 49 | > Tips for naming video files: 50 | > 51 | > Movie: 52 | > 53 | > * Naming format: Movie_Name (Release_Year).ext 54 | > * Example: Avatar (2009).avi 55 | > 56 | > TV Show: 57 | > * Naming format: TV_Show_Name.SXX.EYY.ext (***S*** as a shorthand for ***Season*** and ***E*** for ***Episode***) 58 | > * Example: Gossip Girl.S03.E04.avi 59 | 60 | ## Development 61 | 62 | You can develop your own plugin based on this project easily. Here are the steps: 63 | 64 | 1. Clone this repository to your local machine: 65 | 66 | ```shell 67 | git clone https://github.com/C5H12O5/syno-videoinfo-plugin 68 | ``` 69 | 70 | 2. Modify the code as you want, and test it like this: 71 | 72 | ```shell 73 | python main.py --type movie --input "{\"title\":\"{movie_title}\"}" --limit 1 --loglevel debug 74 | ``` 75 | 76 | 3. Package the plugin using the following command: 77 | 78 | ```shell 79 | python setup.py sdist --formats=zip 80 | ``` 81 | 82 | ## License 83 | 84 | [Apache-2.0 license](LICENSE) -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | # *Syno*logy Video Info Plugin 2 | 3 | [![GitHub Release](https://img.shields.io/github/v/release/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=blue)](https://github.com/C5H12O5/syno-videoinfo-plugin/releases) 4 | ![GitHub Stars](https://img.shields.io/github/stars/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=yellow) 5 | ![GitHub Downloads](https://img.shields.io/github/downloads/C5H12O5/syno-videoinfo-plugin/total?logo=github&style=flat&color=green) 6 | ![Python Support](https://img.shields.io/badge/Python-3.6+-green?logo=python&style=flat&color=steelblue) 7 | [![GitHub License](https://img.shields.io/github/license/C5H12O5/syno-videoinfo-plugin?logo=apache&style=flat&color=lightslategray)](LICENSE) 8 | 9 | ###### 📖 [English](README.md) / 📖 简体中文 10 | 11 | 本项目是群晖 **Video Station** 的第三方视频信息插件,它提供了一种从各大影视数据库平台获取视频元数据的方法。 12 | 13 | * 使用Python标准库实现,无需安装任何依赖。 14 | * 支持多个数据来源,并且可以轻松扩展。 15 | * 有简单的配置页面,可以自定义你的插件。 16 | 17 | ![preview](preview.png) 18 | 19 | ## 使用说明 20 | 21 | 安装插件: 22 | 23 | 1. 从[***此处***](https://github.com/C5H12O5/syno-videoinfo-plugin/releases)下载最新版本。 24 | 2. 打开 **Video Station**,进入 ***设置*** > ***视频信息插件***。 25 | 3. 点击 **[新增]**,选择第一步下载的压缩包,然后点击 **[确定]**。 26 | 27 | 配置插件: 28 | 29 | 1. 打开你的浏览器,输入`http://[NAS_IP]:5125`(将`[NAS_IP]`替换为你的NAS的IP地址)打开配置页面。 30 | 2. 根据你的需要修改配置,然后点击右上角的 **[ :floppy_disk: ]** 按钮。 31 | 3. 返回你的 **Video Station**,保存的配置将会自动生效。 32 | > [!NOTE] 33 | > 如果升级了插件但是配置页面没有更新,可以通过以下步骤来重启配置服务: 34 | > 1. 打开配置页面,点击右上角的 **[Exit]** 按钮关闭配置服务。 35 | > 2. 返回 **Video Station**,进入 ***设置*** > ***视频信息插件***,点击 **[测试连接]** 按钮即可重新启动配置服务。 36 | 37 | ## 版本要求 38 | 39 | * Python 3.6+ 40 | * Video Station 2.5.0+(DSM 6.0) 41 | * Video Station 3.0.0+(DSM 7.0) 42 | 43 | ## 参考文献 44 | 45 | * [视频元数据](https://kb.synology.cn/zh-cn/DSM/help/VideoStation/metadata?version=7) 46 | * [Video Station API 文档](https://download.synology.com/download/Document/Software/DeveloperGuide/Package/VideoStation/All/enu/Synology_Video_Station_API_enu.pdf) 47 | 48 | > 视频文件命名提示: 49 | > 50 | > 电影: 51 | > 52 | > * 命名格式:电影名称 (发行年份).ext 53 | > * 例如:Avatar (2009).avi 54 | > 55 | > 电视节目: 56 | > * 命名格式:电视节目名称.SXX.EYY.ext(***S*** 是 ***季数*** 的缩写,***E*** 是 ***集数*** 的缩写) 57 | > * 例如:Gossip Girl.S03.E04.avi 58 | 59 | ## 如何开发 60 | 61 | 您可以基于本项目并按以下步骤来开发自己的插件: 62 | 63 | 1. 将本项目克隆到本地: 64 | 65 | ```shell 66 | git clone https://github.com/C5H12O5/syno-videoinfo-plugin 67 | ``` 68 | 69 | 2. 根据需要修改代码,并可以使用以下命令进行测试: 70 | 71 | ```shell 72 | python main.py --type movie --input "{\"title\":\"{movie_title}\"}" --limit 1 --loglevel debug 73 | ``` 74 | 75 | 3. 然后可以使用以下命令进行打包并上传使用: 76 | 77 | ```shell 78 | python setup.py sdist --formats=zip 79 | ``` 80 | 81 | ## 使用许可 82 | 83 | [Apache-2.0 license](LICENSE) -------------------------------------------------------------------------------- /configserver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C5H12O5/syno-videoinfo-plugin/27cc22b763343e85ea64fc5f5e34d078b8a7ab68/configserver/__init__.py -------------------------------------------------------------------------------- /configserver/server.py: -------------------------------------------------------------------------------- 1 | """A simple HTTP server for configuration.""" 2 | import ast 3 | import http 4 | import json 5 | import string 6 | import sys 7 | from http.server import HTTPServer 8 | from pathlib import Path 9 | 10 | HOST = "0.0.0.0" 11 | PORT = 5125 12 | 13 | # define the base directory 14 | _basedir = Path(__file__).resolve().parent 15 | 16 | # define the configuration files 17 | _resolvers_conf = _basedir / "../resolvers.conf" 18 | _flows_conf = _basedir / "../scrapeflows.conf" 19 | _auth_conf = _basedir / "authorization" 20 | 21 | # initialize the templates 22 | with open(_basedir / "templates/config.html", "r", encoding="utf-8") as html: 23 | _config_tmpl = string.Template(html.read()) 24 | with open(_basedir / "templates/source.html", "r", encoding="utf-8") as html: 25 | _source_tmpl = string.Template(html.read()) 26 | with open(_basedir / "templates/index.html", "r", encoding="utf-8") as html: 27 | _index_tmpl = string.Template(html.read()) 28 | 29 | 30 | def render_index(saved=None): 31 | """Render the index page.""" 32 | source_html = "" 33 | sites = load_sites() 34 | for site, site_conf in sites.items(): 35 | saved_conf = saved.get(site) if saved is not None else None 36 | config_html = render_config(site, site_conf, saved_conf) 37 | types = site_conf["types"] 38 | doh_enabled = site_conf["doh_enabled"] 39 | source = { 40 | "site": site, 41 | "movie": "selected" if "movie" in types else "disabled", 42 | "tvshow": "selected" if "tvshow" in types else "disabled", 43 | "doh_enabled": "selected" if doh_enabled else "", 44 | "doh_disabled": "selected" if not doh_enabled else "", 45 | "priority": len(sites), 46 | "config": config_html, 47 | } 48 | if saved_conf is not None: 49 | saved_types = saved_conf["types"] 50 | saved_doh = saved_conf["doh"] 51 | source["movie"] = "selected" if "movie" in saved_types else "" 52 | source["tvshow"] = "selected" if "tvshow" in saved_types else "" 53 | source["doh_enabled"] = "selected" if saved_doh else "" 54 | source["doh_disabled"] = "selected" if not saved_doh else "" 55 | source["priority"] = saved_conf["priority"] 56 | source_html += _source_tmpl.substitute(source) 57 | 58 | return _index_tmpl.substitute( 59 | sources=source_html, resolvers=load_resolvers(), version=load_version() 60 | ) 61 | 62 | 63 | def render_config(site, site_conf, saved_conf): 64 | """Render the configuration for a site.""" 65 | config_html = "" 66 | config = site_conf.get("config") 67 | if config is not None: 68 | for key, option in config.items(): 69 | value = saved_conf.get(key, "") if saved_conf is not None else "" 70 | mapping = {"site": site, "key": key, "value": value} 71 | mapping.update(option) 72 | config_html += _config_tmpl.substitute(mapping) 73 | return config_html 74 | 75 | 76 | def load_sites(): 77 | """Load the list of sites and types from flow definitions.""" 78 | sites = {} 79 | for filepath in (_basedir / "../scrapeflows").glob("*.json"): 80 | with open(filepath, "r", encoding="utf-8") as def_reader: 81 | flowdef = json.load(def_reader) 82 | site = flowdef["site"] 83 | site_conf = sites.get(site, {}) 84 | site_conf["doh_enabled"] = flowdef.get("doh_enabled", False) 85 | 86 | # aggregate types 87 | type_ = flowdef["type"].split("_", 1)[0] 88 | types = site_conf.get("types", []) 89 | if type_ not in types: 90 | types.append(type_) 91 | site_conf["types"] = types 92 | 93 | # aggregate config 94 | if "config" in flowdef: 95 | config = site_conf.get("config", {}) 96 | config.update(flowdef["config"]) 97 | site_conf["config"] = config 98 | 99 | sites[site] = site_conf 100 | 101 | return dict(sorted(sites.items(), key=lambda x: x[0])) 102 | 103 | 104 | def load_resolvers(): 105 | """Load the list of DoH resolvers.""" 106 | with open(_resolvers_conf, "r", encoding="utf-8") as doh_reader: 107 | return ast.literal_eval(doh_reader.read()) 108 | 109 | 110 | def load_version(): 111 | """Load the plugin version from the directory name.""" 112 | dir_name = _basedir.parent.name 113 | if "-" in dir_name: 114 | version = dir_name.split("-")[-1] 115 | if version != "plugin": 116 | return f"v{version}" 117 | return "" 118 | 119 | 120 | # initialize the index page 121 | _index_html = render_index() 122 | 123 | 124 | class RequestHandler(http.server.SimpleHTTPRequestHandler): 125 | """Request handler for the HTTP server.""" 126 | 127 | def do_AUTH(self): 128 | if not _auth_conf.exists(): 129 | return True 130 | 131 | with open(_auth_conf, "r", encoding="utf-8") as auth_reader: 132 | saved_auth = auth_reader.read() 133 | 134 | if self.headers.get("Authorization") is not None: 135 | auth_header = self.headers.get("Authorization") 136 | if auth_header.split("Basic ")[1] == saved_auth: 137 | return True 138 | 139 | self.send_response(401) 140 | self.send_header("WWW-Authenticate", 'Basic realm="Login Required"') 141 | self.send_header("Content-type", "text/html") 142 | self.end_headers() 143 | self.wfile.write(b"Unauthorized") 144 | return False 145 | 146 | def do_GET(self): 147 | if not self.do_AUTH(): 148 | return 149 | 150 | self.send_response(200) 151 | self.send_header("Content-type", "text/html") 152 | self.end_headers() 153 | 154 | if self.path == "/": 155 | # index page 156 | if _flows_conf.exists(): 157 | with open(_flows_conf, "r", encoding="utf-8") as conf_reader: 158 | saved_conf = json.load(conf_reader) 159 | self.wfile.write(render_index(saved_conf).encode("utf-8")) 160 | else: 161 | self.wfile.write(_index_html.encode("utf-8")) 162 | 163 | elif self.path == "/exit": 164 | # close the server 165 | self.server.server_close() 166 | sys.exit() 167 | 168 | def do_POST(self): 169 | if not self.do_AUTH(): 170 | return 171 | 172 | self.send_response(200) 173 | self.end_headers() 174 | content_length = int(self.headers["Content-Length"]) 175 | request_body = self.rfile.read(content_length) 176 | 177 | if self.path == "/save": 178 | # save the configuration 179 | conf = json.loads(request_body.decode("utf-8")) 180 | with open(_flows_conf, "w", encoding="utf-8") as conf_writer: 181 | conf_writer.write(json.dumps( 182 | conf["flows"], ensure_ascii=False, indent=2 183 | )) 184 | with open(_resolvers_conf, "w", encoding="utf-8") as doh_writer: 185 | doh_writer.write(json.dumps( 186 | conf["resolvers"], ensure_ascii=False, indent=2 187 | )) 188 | 189 | elif self.path == "/auth": 190 | # save the authorization 191 | with open(_auth_conf, "w", encoding="utf-8") as auth_writer: 192 | auth_writer.write(request_body.decode("utf-8")) 193 | 194 | 195 | if __name__ == "__main__": 196 | httpd = HTTPServer((HOST, PORT), RequestHandler) 197 | httpd.serve_forever() 198 | -------------------------------------------------------------------------------- /configserver/templates/config.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | ${icon} 4 | 5 | 6 |
7 |
-------------------------------------------------------------------------------- /configserver/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Synology Video Info Plugin 6 | 7 | 8 | 9 | 10 | 11 | 22 | 23 | 24 | 25 | 50 | 51 |
52 |
53 |
54 |
55 |
Metadata Sources:
56 |
57 | 62 |
63 | 66 |
67 |
68 |
DNS-over-HTTPS Resolvers:
69 |
70 |
71 |
72 |
${version}
73 |
74 |
75 | 76 | 85 | 86 | 95 | 96 | 115 | 116 | 117 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /configserver/templates/source.html: -------------------------------------------------------------------------------- 1 |
  • 2 |
    3 | edit_square 4 | ${site} 5 |
    6 |
    7 |
    8 |
    9 |
    10 |
    11 | stacks 12 | 16 | 17 |
    18 |
    19 | foggy 20 | 24 | 25 |
    26 |
    27 | swap_vert 28 | 29 | 30 |
    31 |
    32 | ${config} 33 |
    34 |
    35 |
    36 |
  • -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """Entry point for this plugin.""" 2 | from pathlib import Path 3 | 4 | import scraper 5 | 6 | if __name__ == "__main__": 7 | # Prints the output of the scraper to the console. 8 | root_dir = Path(__file__).resolve().parent 9 | plugin_id = root_dir.name 10 | print(scraper.scrape(plugin_id)) 11 | -------------------------------------------------------------------------------- /preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/C5H12O5/syno-videoinfo-plugin/27cc22b763343e85ea64fc5f5e34d078b8a7ab68/preview.png -------------------------------------------------------------------------------- /resolvers.conf: -------------------------------------------------------------------------------- 1 | [ 2 | # https://developers.cloudflare.com/1.1.1.1/encryption/dns-over-https 3 | "1.0.0.1", 4 | "1.1.1.1", 5 | 6 | # https://support.quad9.net/hc/en-us 7 | "9.9.9.9", 8 | "149.112.112.112", 9 | 10 | # https://support.opendns.com/hc/en-us 11 | "208.67.220.220", 12 | "208.67.222.222", 13 | 14 | # https://developers.google.com/speed/public-dns/docs/doh 15 | "dns.google", 16 | 17 | # https://adguard-dns.io/public-dns.html 18 | "dns.adguard-dns.com", 19 | ] -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | BASEDIR=$(dirname $0) 4 | ARGV="" 5 | PORT=5125 6 | 7 | escape() 8 | { 9 | local ARG=$(echo -E "$@" | sed "s/'/'\\\\''/g") 10 | echo \'$ARG\' 11 | } 12 | 13 | i=1 14 | while [ $i -le $# ]; do 15 | eval ARG=\$\(escape \${$i}\) 16 | ARGV="$ARGV $ARG" 17 | i=`expr $i + 1` 18 | done 19 | 20 | if ! netstat -tuln | grep ":$PORT" >/dev/null; then 21 | nohup /usr/bin/env python3 "$BASEDIR"/configserver/server.py > /dev/null 2>&1 & 22 | fi 23 | 24 | eval "/usr/bin/env python3 $BASEDIR/main.py $ARGV" -------------------------------------------------------------------------------- /scrapeflows/bangumi_movie.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "movie", 3 | "site": "bangumi.tv", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "api.bgm.tv" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}", 13 | "method": "GET", 14 | "headers": { 15 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 16 | }, 17 | "result": "metadata" 18 | } 19 | }, 20 | { 21 | "collect": { 22 | "source": "metadata", 23 | "into": { 24 | "ids": "['xp_texts', './list//id']" 25 | } 26 | } 27 | }, 28 | { 29 | "loop": { 30 | "source": "ids", 31 | "item": "id", 32 | "steps": [ 33 | { 34 | "http": { 35 | "url": "https://api.bgm.tv/v0/subjects/{id}", 36 | "method": "GET", 37 | "headers": { 38 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 39 | }, 40 | "result": "subject" 41 | } 42 | }, 43 | { 44 | "collect": { 45 | "source": "subject", 46 | "into": { 47 | "movie": { 48 | "title": "['xp_text', './name_cn']", 49 | "tagline": "['xp_text', './name']", 50 | "original_available": "['xp_text', './date']", 51 | "summary": "['xp_text', './summary']", 52 | "certificate": "", 53 | "genre": "['xp_texts', './tags//name']", 54 | "actor": [], 55 | "writer": "['re_matches', '\"key\":\"原作\",\"value\":\"([^\"]*?)\"']", 56 | "director": "['re_matches', '\"key\":\"导演\",\"value\":\"([^\"]*?)\"']", 57 | "extra": { 58 | "[plugin_id]": { 59 | "rating": { 60 | "[plugin_id]": "['xp_text', './rating//score', 'float']" 61 | }, 62 | "poster": [ 63 | "['xp_text', './images//large']" 64 | ], 65 | "backdrop": [ 66 | "['xp_text', './images//large']" 67 | ] 68 | } 69 | } 70 | }, 71 | "publish_date": "['xp_text', './date']", 72 | "available_date": "{$parent[available]}" 73 | } 74 | } 75 | }, 76 | { 77 | "http": { 78 | "url": "https://api.bgm.tv/v0/subjects/{id}/characters", 79 | "method": "GET", 80 | "headers": { 81 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 82 | }, 83 | "result": "characters" 84 | } 85 | }, 86 | { 87 | "collect": { 88 | "source": "characters", 89 | "into": { 90 | "movie": { 91 | "actor": "['xp_texts', './/actors//name']" 92 | } 93 | } 94 | } 95 | }, 96 | { 97 | "retval": { 98 | "source": "movie", 99 | "compare": "['publish_date', '>=', 'available_date']" 100 | } 101 | } 102 | ] 103 | } 104 | } 105 | ] 106 | } -------------------------------------------------------------------------------- /scrapeflows/bangumi_tvshow.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow", 3 | "site": "bangumi.tv", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "api.bgm.tv" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}", 13 | "method": "GET", 14 | "headers": { 15 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 16 | }, 17 | "result": "metadata" 18 | } 19 | }, 20 | { 21 | "collect": { 22 | "source": "metadata", 23 | "into": { 24 | "ids": "['xp_texts', './list//id']" 25 | } 26 | } 27 | }, 28 | { 29 | "loop": { 30 | "source": "ids", 31 | "item": "id", 32 | "steps": [ 33 | { 34 | "http": { 35 | "url": "https://api.bgm.tv/v0/subjects/{id}", 36 | "method": "GET", 37 | "headers": { 38 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 39 | }, 40 | "result": "subject" 41 | } 42 | }, 43 | { 44 | "collect": { 45 | "source": "subject", 46 | "into": { 47 | "tvshow": { 48 | "title": "['xp_text', './name_cn']", 49 | "original_available": "['xp_text', './date']", 50 | "summary": "['xp_text', './summary']", 51 | "extra": { 52 | "[plugin_id]": { 53 | "poster": [ 54 | "['xp_text', './images//large']" 55 | ], 56 | "backdrop": [ 57 | "['xp_text', './images//large']" 58 | ] 59 | } 60 | } 61 | }, 62 | "publish_date": "['xp_text', './date']", 63 | "available_date": "{$parent[available]}" 64 | } 65 | } 66 | }, 67 | { 68 | "retval": { 69 | "source": "tvshow", 70 | "compare": "['publish_date', '>=', 'available_date']" 71 | } 72 | } 73 | ] 74 | } 75 | } 76 | ] 77 | } -------------------------------------------------------------------------------- /scrapeflows/bangumi_tvshow_episode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow_episode", 3 | "site": "bangumi.tv", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "api.bgm.tv" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}", 13 | "method": "GET", 14 | "headers": { 15 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 16 | }, 17 | "result": "metadata" 18 | } 19 | }, 20 | { 21 | "collect": { 22 | "source": "metadata", 23 | "into": { 24 | "ids": "['xp_texts', './list//id']" 25 | } 26 | } 27 | }, 28 | { 29 | "loop": { 30 | "source": "ids", 31 | "item": "id", 32 | "steps": [ 33 | { 34 | "http": { 35 | "url": "https://api.bgm.tv/v0/episodes?subject_id={id}&type=0&limit=100&offset=0", 36 | "method": "GET", 37 | "headers": { 38 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 39 | }, 40 | "result": "episodes" 41 | } 42 | }, 43 | { 44 | "collect": { 45 | "source": "episodes", 46 | "into": { 47 | "ep": "['re_match', '\"ep\":{$parent[episode]},[^{{}}]*?\"id\":(\\d*?),']" 48 | } 49 | } 50 | }, 51 | { 52 | "retval": { 53 | "ifempty": "ep" 54 | } 55 | }, 56 | { 57 | "http": { 58 | "url": "https://api.bgm.tv/v0/subjects/{id}", 59 | "method": "GET", 60 | "headers": { 61 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 62 | }, 63 | "result": "subject" 64 | } 65 | }, 66 | { 67 | "collect": { 68 | "source": "subject", 69 | "into": { 70 | "episode": { 71 | "title": "['xp_text', './name_cn']", 72 | "tagline": "['xp_text', './name']", 73 | "original_available": "['xp_text', './date']", 74 | "summary": "['xp_text', './summary']", 75 | "certificate": "", 76 | "genre": "['xp_texts', './tags//name']", 77 | "actor": [], 78 | "writer": "['re_matches', '\"key\":\"原作\",\"value\":\"([^\"]*?)\"']", 79 | "director": "['re_matches', '\"key\":\"导演\",\"value\":\"([^\"]*?)\"']", 80 | "extra": { 81 | "[plugin_id]": { 82 | "tvshow": { 83 | "title": "['xp_text', './name_cn']", 84 | "original_available": "['xp_text', './date']", 85 | "summary": "['xp_text', './summary']", 86 | "extra": { 87 | "[plugin_id]": { 88 | "poster": [ 89 | "['xp_text', './images//large']" 90 | ], 91 | "backdrop": [ 92 | "['xp_text', './images//large']" 93 | ] 94 | } 95 | } 96 | }, 97 | "rating": { 98 | "[plugin_id]": "['xp_text', './rating//score', 'float']" 99 | }, 100 | "poster": [ 101 | "['xp_text', './images//large']" 102 | ] 103 | } 104 | } 105 | }, 106 | "publish_date": "['xp_text', './date']", 107 | "available_date": "{$parent[available]}" 108 | } 109 | } 110 | }, 111 | { 112 | "collect": { 113 | "source": "$parent", 114 | "into": { 115 | "episode": { 116 | "season": "['get', 'season']", 117 | "episode": "['get', 'episode']" 118 | } 119 | } 120 | } 121 | }, 122 | { 123 | "http": { 124 | "url": "https://api.bgm.tv/v0/subjects/{id}/characters", 125 | "method": "GET", 126 | "headers": { 127 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 128 | }, 129 | "result": "characters" 130 | } 131 | }, 132 | { 133 | "collect": { 134 | "source": "characters", 135 | "into": { 136 | "episode": { 137 | "actor": "['xp_texts', './/actors//name']" 138 | } 139 | } 140 | } 141 | }, 142 | { 143 | "http": { 144 | "url": "https://api.bgm.tv/v0/episodes/{ep}", 145 | "method": "GET", 146 | "headers": { 147 | "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)" 148 | }, 149 | "result": "subject" 150 | } 151 | }, 152 | { 153 | "collect": { 154 | "source": "subject", 155 | "into": { 156 | "episode": { 157 | "tagline": "['xp_text', './name_cn']", 158 | "original_available": "['xp_text', './airdate']", 159 | "summary": "['xp_text', './desc']" 160 | } 161 | } 162 | } 163 | }, 164 | { 165 | "retval": { 166 | "source": "episode", 167 | "compare": "['publish_date', '>=', 'available_date']" 168 | } 169 | } 170 | ] 171 | } 172 | } 173 | ] 174 | } -------------------------------------------------------------------------------- /scrapeflows/douban_movie.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "movie", 3 | "site": "douban.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "hosts": [ 8 | "www.douban.com", 9 | "movie.douban.com" 10 | ] 11 | } 12 | }, 13 | { 14 | "http": { 15 | "url": "https://www.douban.com/search?cat=1002&q={title}", 16 | "method": "GET", 17 | "headers": { 18 | "Referer": "https://www.douban.com/", 19 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 20 | }, 21 | "result": "metadata" 22 | } 23 | }, 24 | { 25 | "collect": { 26 | "source": "metadata", 27 | "into": { 28 | "ids": "['re_matches', '电影].*?sid:\\s*(\\d+)\\s*,']" 29 | } 30 | } 31 | }, 32 | { 33 | "loop": { 34 | "source": "ids", 35 | "item": "id", 36 | "steps": [ 37 | { 38 | "http": { 39 | "url": "https://movie.douban.com/subject/{id}/", 40 | "method": "GET", 41 | "headers": { 42 | "Host": "movie.douban.com", 43 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 44 | }, 45 | "result": "subject" 46 | } 47 | }, 48 | { 49 | "collect": { 50 | "source": "subject", 51 | "from": "['xp_text', './/script[@type=\"application/ld+json\"]']", 52 | "into": { 53 | "movie": { 54 | "title": "['xp_text', './name']", 55 | "tagline": "", 56 | "original_available": "['xp_text', './datePublished']", 57 | "certificate": "", 58 | "genre": "['xp_texts', './genre/*']", 59 | "actor": "['xp_texts', './actor//name']", 60 | "writer": "['xp_texts', './author//name']", 61 | "director": "['xp_texts', './director//name']", 62 | "extra": { 63 | "[plugin_id]": { 64 | "rating": { 65 | "[plugin_id]": "['xp_text', './/ratingValue', 'float']" 66 | }, 67 | "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']", 68 | "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']" 69 | } 70 | } 71 | }, 72 | "publish_date": "['xp_text', './datePublished']", 73 | "available_date": "{$parent[available]}" 74 | } 75 | } 76 | }, 77 | { 78 | "collect": { 79 | "source": "subject", 80 | "into": { 81 | "movie": { 82 | "summary": "['xp_text', './/span[@property=\"v:summary\"]']", 83 | "extra": { 84 | "[plugin_id]": { 85 | "reference": { 86 | "imdb": "['re_match', 'IMDb:\\s*(.*?)\\s*
    ']" 87 | } 88 | } 89 | } 90 | } 91 | } 92 | } 93 | }, 94 | { 95 | "retval": { 96 | "source": "movie", 97 | "compare": "['publish_date', '>=', 'available_date']" 98 | } 99 | } 100 | ] 101 | } 102 | } 103 | ] 104 | } -------------------------------------------------------------------------------- /scrapeflows/douban_tvshow.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow", 3 | "site": "douban.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "hosts": [ 8 | "www.douban.com", 9 | "movie.douban.com" 10 | ] 11 | } 12 | }, 13 | { 14 | "http": { 15 | "url": "https://www.douban.com/search?cat=1002&q={title}", 16 | "method": "GET", 17 | "headers": { 18 | "Referer": "https://www.douban.com/", 19 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 20 | }, 21 | "result": "metadata" 22 | } 23 | }, 24 | { 25 | "collect": { 26 | "source": "metadata", 27 | "into": { 28 | "ids": "['re_matches', '电视剧].*?sid:\\s*(\\d+)\\s*,']" 29 | } 30 | } 31 | }, 32 | { 33 | "loop": { 34 | "source": "ids", 35 | "item": "id", 36 | "steps": [ 37 | { 38 | "http": { 39 | "url": "https://movie.douban.com/subject/{id}/", 40 | "method": "GET", 41 | "headers": { 42 | "Host": "movie.douban.com", 43 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 44 | }, 45 | "result": "subject" 46 | } 47 | }, 48 | { 49 | "collect": { 50 | "source": "subject", 51 | "from": "['xp_text', './/script[@type=\"application/ld+json\"]']", 52 | "into": { 53 | "tvshow": { 54 | "title": "['xp_text', './name']", 55 | "original_available": "['xp_text', './datePublished']", 56 | "extra": { 57 | "[plugin_id]": { 58 | "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']", 59 | "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']" 60 | } 61 | } 62 | }, 63 | "publish_date": "['xp_text', './datePublished']", 64 | "available_date": "{$parent[available]}" 65 | } 66 | } 67 | }, 68 | { 69 | "collect": { 70 | "source": "subject", 71 | "into": { 72 | "tvshow": { 73 | "summary": "['xp_text', './/span[@property=\"v:summary\"]']" 74 | } 75 | } 76 | } 77 | }, 78 | { 79 | "retval": { 80 | "source": "tvshow", 81 | "compare": "['publish_date', '>=', 'available_date']" 82 | } 83 | } 84 | ] 85 | } 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /scrapeflows/douban_tvshow_episode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow_episode", 3 | "site": "douban.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "hosts": [ 8 | "www.douban.com", 9 | "movie.douban.com" 10 | ] 11 | } 12 | }, 13 | { 14 | "http": { 15 | "url": "https://www.douban.com/search?cat=1002&q={title}", 16 | "method": "GET", 17 | "headers": { 18 | "Referer": "https://www.douban.com/", 19 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 20 | }, 21 | "result": "metadata" 22 | } 23 | }, 24 | { 25 | "collect": { 26 | "source": "metadata", 27 | "into": { 28 | "ids": "['re_matches', '电视剧].*?sid:\\s*(\\d+)\\s*,']" 29 | } 30 | } 31 | }, 32 | { 33 | "loop": { 34 | "source": "ids", 35 | "item": "id", 36 | "steps": [ 37 | { 38 | "http": { 39 | "url": "https://movie.douban.com/subject/{id}/", 40 | "method": "GET", 41 | "headers": { 42 | "Host": "movie.douban.com", 43 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36" 44 | }, 45 | "result": "subject" 46 | } 47 | }, 48 | { 49 | "collect": { 50 | "source": "subject", 51 | "from": "['xp_text', './/script[@type=\"application/ld+json\"]']", 52 | "into": { 53 | "episode": { 54 | "title": "['xp_text', './name']", 55 | "tagline": "", 56 | "original_available": "['xp_text', './datePublished']", 57 | "certificate": "", 58 | "genre": "['xp_texts', './genre/*']", 59 | "actor": "['xp_texts', './actor//name']", 60 | "writer": "['xp_texts', './author//name']", 61 | "director": "['xp_texts', './director//name']", 62 | "extra": { 63 | "[plugin_id]": { 64 | "tvshow": { 65 | "title": "['xp_text', './name']", 66 | "original_available": "['xp_text', './datePublished']", 67 | "extra": { 68 | "[plugin_id]": { 69 | "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']", 70 | "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']" 71 | } 72 | } 73 | }, 74 | "rating": { 75 | "[plugin_id]": "['xp_text', './/ratingValue', 'float']" 76 | }, 77 | "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']" 78 | } 79 | } 80 | }, 81 | "publish_date": "['xp_text', './datePublished']", 82 | "available_date": "{$parent[available]}" 83 | } 84 | } 85 | }, 86 | { 87 | "collect": { 88 | "source": "subject", 89 | "into": { 90 | "episode": { 91 | "summary": "['xp_text', './/span[@property=\"v:summary\"]']", 92 | "extra": { 93 | "[plugin_id]": { 94 | "tvshow": { 95 | "summary": "['xp_text', './/span[@property=\"v:summary\"]']" 96 | }, 97 | "reference": { 98 | "imdb": "['re_match', 'IMDb:\\s*(.*?)\\s*
    ']" 99 | } 100 | } 101 | } 102 | } 103 | } 104 | } 105 | }, 106 | { 107 | "collect": { 108 | "source": "$parent", 109 | "into": { 110 | "episode": { 111 | "season": "['get', 'season']", 112 | "episode": "['get', 'episode']" 113 | } 114 | } 115 | } 116 | }, 117 | { 118 | "retval": { 119 | "source": "episode", 120 | "compare": "['publish_date', '>=', 'available_date']" 121 | } 122 | } 123 | ] 124 | } 125 | } 126 | ] 127 | } -------------------------------------------------------------------------------- /scrapeflows/maoyan_movie.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "movie", 3 | "site": "maoyan.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "i.maoyan.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1", 13 | "method": "GET", 14 | "headers": { 15 | "Host": "i.maoyan.com", 16 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 17 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 18 | }, 19 | "result": "metadata" 20 | } 21 | }, 22 | { 23 | "collect": { 24 | "source": "metadata", 25 | "into": { 26 | "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":0']" 27 | } 28 | } 29 | }, 30 | { 31 | "loop": { 32 | "source": "ids", 33 | "item": "id", 34 | "steps": [ 35 | { 36 | "http": { 37 | "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1", 38 | "method": "GET", 39 | "headers": { 40 | "Host": "i.maoyan.com", 41 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 42 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 43 | }, 44 | "result": "subject" 45 | } 46 | }, 47 | { 48 | "collect": { 49 | "source": "subject", 50 | "from": "['re_match', 'var AppData = (.*?);\\s*']", 51 | "into": { 52 | "movie": { 53 | "title": "['xp_text', './movie/nm']", 54 | "tagline": "['xp_text', './movie/enm']", 55 | "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 56 | "summary": "['xp_text', './movie/dra']", 57 | "certificate": "", 58 | "genre": "['xp_text', './movie/cat', 'split', ',']", 59 | "actor": "['xp_text', './movie/star', 'split', ',']", 60 | "writer": [], 61 | "director": "['xp_text', './movie/dir', 'split', ',']", 62 | "extra": { 63 | "[plugin_id]": { 64 | "rating": { 65 | "[plugin_id]": "['xp_text', './movie/sc', 'float']" 66 | }, 67 | "poster": "['xp_texts', './movie/img']", 68 | "backdrop": "['xp_texts', './movie/img']" 69 | } 70 | } 71 | }, 72 | "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 73 | "available_date": "{$parent[available]}" 74 | } 75 | } 76 | }, 77 | { 78 | "retval": { 79 | "source": "movie", 80 | "compare": "['publish_date', '>=', 'available_date']" 81 | } 82 | } 83 | ] 84 | } 85 | } 86 | ] 87 | } -------------------------------------------------------------------------------- /scrapeflows/maoyan_tvshow.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow", 3 | "site": "maoyan.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "i.maoyan.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1", 13 | "method": "GET", 14 | "headers": { 15 | "Host": "i.maoyan.com", 16 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 17 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 18 | }, 19 | "result": "metadata" 20 | } 21 | }, 22 | { 23 | "collect": { 24 | "source": "metadata", 25 | "into": { 26 | "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":1']" 27 | } 28 | } 29 | }, 30 | { 31 | "loop": { 32 | "source": "ids", 33 | "item": "id", 34 | "steps": [ 35 | { 36 | "http": { 37 | "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1", 38 | "method": "GET", 39 | "headers": { 40 | "Host": "i.maoyan.com", 41 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 42 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 43 | }, 44 | "result": "subject" 45 | } 46 | }, 47 | { 48 | "collect": { 49 | "source": "subject", 50 | "from": "['re_match', 'var AppData = (.*?);\\s*']", 51 | "into": { 52 | "tvshow": { 53 | "title": "['xp_text', './movie/nm']", 54 | "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 55 | "summary": "['xp_text', './movie/dra']", 56 | "extra": { 57 | "[plugin_id]": { 58 | "poster": "['xp_texts', './movie/img']", 59 | "backdrop": "['xp_texts', './movie/img']" 60 | } 61 | } 62 | }, 63 | "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 64 | "available_date": "{$parent[available]}" 65 | } 66 | } 67 | }, 68 | { 69 | "retval": { 70 | "source": "tvshow", 71 | "compare": "['publish_date', '>=', 'available_date']" 72 | } 73 | } 74 | ] 75 | } 76 | } 77 | ] 78 | } -------------------------------------------------------------------------------- /scrapeflows/maoyan_tvshow_episode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow_episode", 3 | "site": "maoyan.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "i.maoyan.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1", 13 | "method": "GET", 14 | "headers": { 15 | "Host": "i.maoyan.com", 16 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 17 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 18 | }, 19 | "result": "metadata" 20 | } 21 | }, 22 | { 23 | "collect": { 24 | "source": "metadata", 25 | "into": { 26 | "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":1']" 27 | } 28 | } 29 | }, 30 | { 31 | "loop": { 32 | "source": "ids", 33 | "item": "id", 34 | "steps": [ 35 | { 36 | "http": { 37 | "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1", 38 | "method": "GET", 39 | "headers": { 40 | "Host": "i.maoyan.com", 41 | "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan", 42 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 43 | }, 44 | "result": "subject" 45 | } 46 | }, 47 | { 48 | "collect": { 49 | "source": "subject", 50 | "from": "['re_match', 'var AppData = (.*?);\\s*']", 51 | "into": { 52 | "episode": { 53 | "title": "['xp_text', './movie/nm']", 54 | "tagline": "", 55 | "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 56 | "summary": "['xp_text', './movie/dra']", 57 | "certificate": "", 58 | "genre": "['xp_text', './movie/cat', 'split', ',']", 59 | "actor": "['xp_text', './movie/star', 'split', ',']", 60 | "writer": [], 61 | "director": "['xp_text', './movie/dir', 'split', ',']", 62 | "extra": { 63 | "[plugin_id]": { 64 | "tvshow": { 65 | "title": "['xp_text', './movie/nm']", 66 | "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 67 | "summary": "['xp_text', './movie/dra']", 68 | "extra": { 69 | "[plugin_id]": { 70 | "poster": "['xp_texts', './movie/img']", 71 | "backdrop": "['xp_texts', './movie/img']" 72 | } 73 | } 74 | }, 75 | "rating": { 76 | "[plugin_id]": "['xp_text', './movie/sc', 'float']" 77 | }, 78 | "poster": "['xp_texts', './movie/img']" 79 | } 80 | } 81 | }, 82 | "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']", 83 | "available_date": "{$parent[available]}" 84 | } 85 | } 86 | }, 87 | { 88 | "collect": { 89 | "source": "$parent", 90 | "into": { 91 | "episode": { 92 | "season": "['get', 'season']", 93 | "episode": "['get', 'episode']" 94 | } 95 | } 96 | } 97 | }, 98 | { 99 | "retval": { 100 | "source": "episode", 101 | "compare": "['publish_date', '>=', 'available_date']" 102 | } 103 | } 104 | ] 105 | } 106 | } 107 | ] 108 | } -------------------------------------------------------------------------------- /scrapeflows/mtime_movie.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "movie", 3 | "site": "mtime.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "front-gateway.mtime.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2", 13 | "method": "POST", 14 | "headers": { 15 | "Content-Type": "application/x-www-form-urlencoded", 16 | "Host": "front-gateway.mtime.com", 17 | "Origin": "http://film.mtime.com", 18 | "Referer": "http://film.mtime.com/", 19 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 20 | }, 21 | "body": { 22 | "keyword": "{title}", 23 | "searchType": 0, 24 | "pageIndex": 1, 25 | "pageSize": "{limit}", 26 | "year": "{year}" 27 | }, 28 | "result": "metadata" 29 | } 30 | }, 31 | { 32 | "collect": { 33 | "source": "metadata", 34 | "into": { 35 | "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电影\"']" 36 | } 37 | } 38 | }, 39 | { 40 | "loop": { 41 | "source": "ids", 42 | "item": "id", 43 | "steps": [ 44 | { 45 | "http": { 46 | "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}", 47 | "method": "GET", 48 | "headers": { 49 | "Host": "front-gateway.mtime.com", 50 | "Origin": "http://movie.mtime.com", 51 | "Referer": "http://movie.mtime.com/", 52 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 53 | }, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "movie": { 62 | "title": "['xp_text', './data/basic/name']", 63 | "tagline": "['xp_text', './data/basic/nameEn']", 64 | "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']", 65 | "summary": "['xp_text', './data/basic/story']", 66 | "certificate": "", 67 | "genre": "['xp_texts', './data/basic/movieGenres//name']", 68 | "actor": "['xp_texts', './data/basic/actors//name']", 69 | "writer": "['xp_texts', './data/basic/writers//name']", 70 | "director": "['xp_texts', './data/basic/directors//name']", 71 | "extra": { 72 | "[plugin_id]": { 73 | "rating": { 74 | "[plugin_id]": "['xp_text', './data/basic/overallRating', 'float']" 75 | }, 76 | "poster": "['xp_texts', './data/basic/img']", 77 | "backdrop": "['xp_texts', './data/basic/bigImage']" 78 | } 79 | } 80 | } 81 | } 82 | } 83 | }, 84 | { 85 | "retval": { 86 | "source": "movie" 87 | } 88 | } 89 | ] 90 | } 91 | } 92 | ] 93 | } -------------------------------------------------------------------------------- /scrapeflows/mtime_tvshow.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow", 3 | "site": "mtime.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "front-gateway.mtime.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2", 13 | "method": "POST", 14 | "headers": { 15 | "Content-Type": "application/x-www-form-urlencoded", 16 | "Host": "front-gateway.mtime.com", 17 | "Origin": "http://film.mtime.com", 18 | "Referer": "http://film.mtime.com/", 19 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 20 | }, 21 | "body": { 22 | "keyword": "{title}", 23 | "searchType": 0, 24 | "pageIndex": 1, 25 | "pageSize": "{limit}", 26 | "year": "{year}" 27 | }, 28 | "result": "metadata" 29 | } 30 | }, 31 | { 32 | "collect": { 33 | "source": "metadata", 34 | "into": { 35 | "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电视剧\"']" 36 | } 37 | } 38 | }, 39 | { 40 | "loop": { 41 | "source": "ids", 42 | "item": "id", 43 | "steps": [ 44 | { 45 | "http": { 46 | "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}", 47 | "method": "GET", 48 | "headers": { 49 | "Host": "front-gateway.mtime.com", 50 | "Origin": "http://movie.mtime.com", 51 | "Referer": "http://movie.mtime.com/", 52 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 53 | }, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "tvshow": { 62 | "title": "['xp_text', './data/basic/name']", 63 | "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']", 64 | "summary": "['xp_text', './data/basic/story']", 65 | "extra": { 66 | "[plugin_id]": { 67 | "poster": "['xp_texts', './data/basic/img']", 68 | "backdrop": "['xp_texts', './data/basic/img']" 69 | } 70 | } 71 | } 72 | } 73 | } 74 | }, 75 | { 76 | "retval": { 77 | "source": "tvshow" 78 | } 79 | } 80 | ] 81 | } 82 | } 83 | ] 84 | } -------------------------------------------------------------------------------- /scrapeflows/mtime_tvshow_episode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow_episode", 3 | "site": "mtime.com", 4 | "steps": [ 5 | { 6 | "doh": { 7 | "host": "front-gateway.mtime.com" 8 | } 9 | }, 10 | { 11 | "http": { 12 | "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2", 13 | "method": "POST", 14 | "headers": { 15 | "Content-Type": "application/x-www-form-urlencoded", 16 | "Host": "front-gateway.mtime.com", 17 | "Origin": "http://film.mtime.com", 18 | "Referer": "http://film.mtime.com/", 19 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 20 | }, 21 | "body": { 22 | "keyword": "{title}", 23 | "searchType": 0, 24 | "pageIndex": 1, 25 | "pageSize": "{limit}", 26 | "year": "{year}" 27 | }, 28 | "result": "metadata" 29 | } 30 | }, 31 | { 32 | "collect": { 33 | "source": "metadata", 34 | "into": { 35 | "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电视剧\"']" 36 | } 37 | } 38 | }, 39 | { 40 | "loop": { 41 | "source": "ids", 42 | "item": "id", 43 | "steps": [ 44 | { 45 | "http": { 46 | "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}", 47 | "method": "GET", 48 | "headers": { 49 | "Host": "front-gateway.mtime.com", 50 | "Origin": "http://movie.mtime.com", 51 | "Referer": "http://movie.mtime.com/", 52 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36" 53 | }, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "episode": { 62 | "title": "['xp_text', './data/basic/name']", 63 | "tagline": "", 64 | "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']", 65 | "summary": "['xp_text', './data/basic/story']", 66 | "certificate": "", 67 | "genre": "['xp_texts', './data/basic/movieGenres//name']", 68 | "actor": "['xp_texts', './data/basic/actors//name']", 69 | "writer": "['xp_texts', './data/basic/writers//name']", 70 | "director": "['xp_texts', './data/basic/directors//name']", 71 | "extra": { 72 | "[plugin_id]": { 73 | "tvshow": { 74 | "title": "['xp_text', './data/basic/name']", 75 | "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']", 76 | "summary": "['xp_text', './data/basic/story']", 77 | "extra": { 78 | "[plugin_id]": { 79 | "poster": "['xp_texts', './data/basic/img']", 80 | "backdrop": "['xp_texts', './data/basic/img']" 81 | } 82 | } 83 | }, 84 | "rating": { 85 | "[plugin_id]": "['xp_text', './data/basic/overallRating', 'float']" 86 | }, 87 | "poster": "['xp_texts', './data/basic/img']" 88 | } 89 | } 90 | } 91 | } 92 | } 93 | }, 94 | { 95 | "collect": { 96 | "source": "$parent", 97 | "into": { 98 | "episode": { 99 | "season": "['get', 'season']", 100 | "episode": "['get', 'episode']" 101 | } 102 | } 103 | } 104 | }, 105 | { 106 | "retval": { 107 | "source": "episode" 108 | } 109 | } 110 | ] 111 | } 112 | } 113 | ] 114 | } -------------------------------------------------------------------------------- /scrapeflows/tmdb_movie.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "movie", 3 | "site": "themoviedb.org", 4 | "doh_enabled": true, 5 | "config": { 6 | "apikey": { 7 | "icon": "key", 8 | "name": "API Key" 9 | } 10 | }, 11 | "steps": [ 12 | { 13 | "retval": { 14 | "ifempty": "apikey" 15 | } 16 | }, 17 | { 18 | "doh": { 19 | "host": "api.tmdb.org" 20 | } 21 | }, 22 | { 23 | "http": { 24 | "url": "https://api.tmdb.org/3/search/movie?api_key={apikey}&language={lang}&query={title}&year={year}&page=1", 25 | "method": "GET", 26 | "headers": { 27 | "Accept": "application/json" 28 | }, 29 | "timeout": 20, 30 | "result": "metadata" 31 | } 32 | }, 33 | { 34 | "collect": { 35 | "source": "metadata", 36 | "into": { 37 | "ids": "['xp_texts', './results//id']" 38 | } 39 | } 40 | }, 41 | { 42 | "loop": { 43 | "source": "ids", 44 | "item": "id", 45 | "steps": [ 46 | { 47 | "http": { 48 | "url": "https://api.tmdb.org/3/movie/{id}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=credits,release_dates", 49 | "method": "GET", 50 | "headers": { 51 | "Accept": "application/json" 52 | }, 53 | "timeout": 20, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "movie": { 62 | "title": "['xp_text', './title']", 63 | "tagline": "['xp_text', './tagline']", 64 | "original_available": "['xp_text', './release_date']", 65 | "summary": "['xp_text', './overview']", 66 | "certificate": "['re_match', '\"release_dates\":.*?\"US\".*?\"certification\":\"([^\"]*?)\"']", 67 | "genre": "['xp_texts', './genres//name']", 68 | "actor": "['xp_texts', './credits/cast//name']", 69 | "writer": "['re_matches', '\"name\":\"([^\"]*?)\"[^{{}}]*?\"department\":\"Writing\"']", 70 | "director": "['re_matches', '\"name\":\"([^\"]*?)\"[^{{}}]*?\"department\":\"Directing\"']", 71 | "extra": { 72 | "[plugin_id]": { 73 | "poster": [ 74 | "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']" 75 | ], 76 | "backdrop": [ 77 | "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']" 78 | ], 79 | "reference": { 80 | "themoviedb": "['xp_text', './id', 'int']", 81 | "imdb": "['xp_text', './imdb_id']" 82 | }, 83 | "collection_id": { 84 | "themoviedb": "['xp_text', './belongs_to_collection/id', 'int']" 85 | } 86 | } 87 | } 88 | }, 89 | "rating": "['xp_text', './vote_average', 're_sub', '(\\d+\\.\\d)\\d*', '\\\\1']" 90 | } 91 | } 92 | }, 93 | { 94 | "collect": { 95 | "source": "rating", 96 | "into": { 97 | "movie": { 98 | "extra": { 99 | "[plugin_id]": { 100 | "rating": { 101 | "[plugin_id]": "['re_match', '(.*)', 'float']" 102 | } 103 | } 104 | } 105 | } 106 | } 107 | } 108 | }, 109 | { 110 | "retval": { 111 | "source": "movie" 112 | } 113 | } 114 | ] 115 | } 116 | } 117 | ] 118 | } -------------------------------------------------------------------------------- /scrapeflows/tmdb_tvshow.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow", 3 | "site": "themoviedb.org", 4 | "doh_enabled": true, 5 | "config": { 6 | "apikey": { 7 | "icon": "key", 8 | "name": "API Key" 9 | } 10 | }, 11 | "steps": [ 12 | { 13 | "retval": { 14 | "ifempty": "apikey" 15 | } 16 | }, 17 | { 18 | "doh": { 19 | "host": "api.tmdb.org" 20 | } 21 | }, 22 | { 23 | "http": { 24 | "url": "https://api.tmdb.org/3/search/tv?api_key={apikey}&language={lang}&query={title}&year={year}&page=1", 25 | "method": "GET", 26 | "headers": { 27 | "Accept": "application/json" 28 | }, 29 | "timeout": 20, 30 | "result": "metadata" 31 | } 32 | }, 33 | { 34 | "collect": { 35 | "source": "metadata", 36 | "into": { 37 | "ids": "['xp_texts', './results//id']" 38 | } 39 | } 40 | }, 41 | { 42 | "loop": { 43 | "source": "ids", 44 | "item": "id", 45 | "steps": [ 46 | { 47 | "http": { 48 | "url": "https://api.tmdb.org/3/tv/{id}?api_key={$parent[apikey]}&language={$parent[lang]}", 49 | "method": "GET", 50 | "headers": { 51 | "Accept": "application/json" 52 | }, 53 | "timeout": 20, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "tvshow": { 62 | "title": "['xp_text', './name']", 63 | "original_available": "['xp_text', './first_air_date']", 64 | "summary": "['xp_text', './overview']", 65 | "extra": { 66 | "[plugin_id]": { 67 | "poster": [ 68 | "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']" 69 | ], 70 | "backdrop": [ 71 | "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']" 72 | ] 73 | } 74 | } 75 | } 76 | } 77 | } 78 | }, 79 | { 80 | "retval": { 81 | "source": "tvshow" 82 | } 83 | } 84 | ] 85 | } 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /scrapeflows/tmdb_tvshow_episode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "tvshow_episode", 3 | "site": "themoviedb.org", 4 | "doh_enabled": true, 5 | "config": { 6 | "apikey": { 7 | "icon": "key", 8 | "name": "API Key" 9 | } 10 | }, 11 | "steps": [ 12 | { 13 | "retval": { 14 | "ifempty": "apikey" 15 | } 16 | }, 17 | { 18 | "doh": { 19 | "host": "api.tmdb.org" 20 | } 21 | }, 22 | { 23 | "http": { 24 | "url": "https://api.tmdb.org/3/search/tv?api_key={apikey}&language={lang}&query={title}&year={year}&page=1", 25 | "method": "GET", 26 | "headers": { 27 | "Accept": "application/json" 28 | }, 29 | "timeout": 20, 30 | "result": "metadata" 31 | } 32 | }, 33 | { 34 | "collect": { 35 | "source": "metadata", 36 | "into": { 37 | "ids": "['xp_texts', './results//id']" 38 | } 39 | } 40 | }, 41 | { 42 | "loop": { 43 | "source": "ids", 44 | "item": "id", 45 | "steps": [ 46 | { 47 | "http": { 48 | "url": "https://api.tmdb.org/3/tv/{id}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=content_ratings,external_ids", 49 | "method": "GET", 50 | "headers": { 51 | "Accept": "application/json" 52 | }, 53 | "timeout": 20, 54 | "result": "subject" 55 | } 56 | }, 57 | { 58 | "collect": { 59 | "source": "subject", 60 | "into": { 61 | "episode": { 62 | "title": "['xp_text', './name']", 63 | "certificate": "['re_match', '\"content_ratings\":.*?\"US\".*?\"rating\":\"([^\"]*?)\"']", 64 | "genre": "['xp_texts', './genres//name']", 65 | "extra": { 66 | "[plugin_id]": { 67 | "tvshow": { 68 | "title": "['xp_text', './name']", 69 | "original_available": "['xp_text', './first_air_date']", 70 | "summary": "['xp_text', './overview']", 71 | "extra": { 72 | "[plugin_id]": { 73 | "poster": [ 74 | "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']" 75 | ], 76 | "backdrop": [ 77 | "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']" 78 | ] 79 | } 80 | } 81 | }, 82 | "reference": { 83 | "themoviedb_tv": "['xp_text', './id', 'int']", 84 | "imdb": "['xp_text', './external_ids/imdb_id']" 85 | } 86 | } 87 | } 88 | } 89 | } 90 | } 91 | }, 92 | { 93 | "http": { 94 | "url": "https://api.tmdb.org/3/tv/{id}/season/{$parent[season]}/episode/{$parent[episode]}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=credits", 95 | "method": "GET", 96 | "headers": { 97 | "Accept": "application/json" 98 | }, 99 | "timeout": 20, 100 | "result": "subject" 101 | } 102 | }, 103 | { 104 | "collect": { 105 | "source": "subject", 106 | "into": { 107 | "episode": { 108 | "tagline": "['xp_text', './name']", 109 | "season": "['xp_text', './season_number', 'int']", 110 | "episode": "['xp_text', './episode_number', 'int']", 111 | "original_available": "['xp_text', './air_date']", 112 | "summary": "['xp_text', './overview']", 113 | "actor": "['xp_texts', './credits/cast//name']", 114 | "writer": "['re_matches', '\"department\":\"Writing\"[^{{}}]*?\"name\":\"([^\"]*?)\"']", 115 | "director": "['re_matches', '\"department\":\"Directing\"[^{{}}]*?\"name\":\"([^\"]*?)\"']", 116 | "extra": { 117 | "[plugin_id]": { 118 | "poster": [ 119 | "['xp_text', './still_path', 'prefix', 'https://image.tmdb.org/t/p/w500']" 120 | ] 121 | } 122 | } 123 | }, 124 | "rating": "['xp_text', './vote_average', 're_sub', '(\\d+\\.\\d)\\d*', '\\\\1']" 125 | } 126 | } 127 | }, 128 | { 129 | "collect": { 130 | "source": "subject", 131 | "into": { 132 | "episode": { 133 | "actor": "['xp_texts', './credits/guest_stars//name']" 134 | } 135 | } 136 | } 137 | }, 138 | { 139 | "collect": { 140 | "source": "rating", 141 | "into": { 142 | "episode": { 143 | "extra": { 144 | "[plugin_id]": { 145 | "rating": { 146 | "[plugin_id]": "['re_match', '(.*)', 'float']" 147 | } 148 | } 149 | } 150 | } 151 | } 152 | } 153 | }, 154 | { 155 | "retval": { 156 | "source": "episode" 157 | } 158 | } 159 | ] 160 | } 161 | } 162 | ] 163 | } -------------------------------------------------------------------------------- /scraper/__init__.py: -------------------------------------------------------------------------------- 1 | """A simple web scraper used by the Synology VideoInfo plugin.""" 2 | 3 | __all__ = ["scrape"] 4 | 5 | from scraper.scraper import scrape 6 | -------------------------------------------------------------------------------- /scraper/enums.py: -------------------------------------------------------------------------------- 1 | """Enum classes for this package.""" 2 | from enum import Enum, unique 3 | 4 | 5 | @unique 6 | class VideoType(Enum): 7 | """Type of video being scraped.""" 8 | 9 | MOVIE = "movie" 10 | TVSHOW = "tvshow" 11 | TVSHOW_EPISODE = "tvshow_episode" 12 | 13 | 14 | @unique 15 | class Language(Enum): 16 | """Language and country code, ISO 639-1 and ISO 3166-1.""" 17 | 18 | CHS = "zh-CN" # 简体中文 Simplified Chinese 19 | CHT = "zh-TW" # 繁体中文 Traditional Chinese 20 | CSY = "cs-CZ" # 捷克语 Czech 21 | DAN = "da-DK" # 丹麦语 Danish 22 | ENU = "en-US" # 英语 English 23 | FRE = "fr-FR" # 法语 French 24 | GER = "de-DE" # 德语 German 25 | HUN = "hu-HU" # 匈牙利语 Hungarian 26 | ITA = "it-IT" # 意大利语 Italian 27 | JPN = "ja-JP" # 日语 Japanese 28 | KRN = "ko-KR" # 韩语 Korean 29 | NLD = "nl-NL" # 荷兰语 Nederland 30 | NOR = "no-NO" # 挪威语 Norwegian 31 | PLK = "pl-PL" # 波兰语 Polish 32 | PTB = "pt-BR" # 巴西葡萄牙语 Brazilian Portuguese 33 | PTG = "pt-PT" # 葡萄牙语 Portuguese 34 | RUS = "ru-RU" # 俄语 Russian 35 | SPN = "es-ES" # 西班牙语 Spanish 36 | SVE = "sv-SE" # 瑞典语 Swedish 37 | TRK = "tr-TR" # 土耳其语 Turkish 38 | THA = "th-TH" # 泰语 Thai 39 | 40 | 41 | def video_type(value): 42 | """Convert string to VideoType enum.""" 43 | return VideoType[value.upper()] 44 | 45 | 46 | def lang_type(value): 47 | """Convert string to Language enum.""" 48 | return Language[value.upper()] 49 | -------------------------------------------------------------------------------- /scraper/exceptions.py: -------------------------------------------------------------------------------- 1 | """Exception classes for this package.""" 2 | 3 | 4 | class ScrapeError(Exception): 5 | def __init__(self, error_code: int): 6 | self.error_code = error_code 7 | 8 | 9 | class RequestSendError(ScrapeError): 10 | def __init__(self): 11 | super().__init__(1003) 12 | 13 | 14 | class ResultParseError(ScrapeError): 15 | def __init__(self): 16 | super().__init__(1004) 17 | 18 | 19 | class StopSignal(Exception): 20 | pass 21 | -------------------------------------------------------------------------------- /scraper/fake.py: -------------------------------------------------------------------------------- 1 | """Fake result for testing.""" 2 | import json 3 | from typing import Any, List 4 | 5 | 6 | def fake_result(plugin_id: str, videotype: str) -> str: 7 | """Return fake result.""" 8 | result: List[Any] = [] 9 | if videotype == "movie": 10 | result.append(_movie) 11 | elif videotype == "tvshow": 12 | result.append(_tvshow) 13 | elif videotype == "tvshow_episode": 14 | result.append(_tvshow_episode) 15 | return json.dumps( 16 | {"success": True, "result": result}, ensure_ascii=False, indent=2 17 | ).replace("[plugin_id]", plugin_id) 18 | 19 | 20 | _movie = { 21 | "title": "unknown", 22 | "tagline": "unknown", 23 | "original_available": "1970-01-01", 24 | "summary": "unknown", 25 | "certificate": "unknown", 26 | "genre": ["unknown"], 27 | "actor": ["unknown"], 28 | "writer": ["unknown"], 29 | "director": ["unknown"], 30 | "extra": { 31 | "[plugin_id]": { 32 | "rating": {"[plugin_id]": 0}, 33 | "poster": ["unknown"], 34 | "backdrop": ["unknown"], 35 | } 36 | }, 37 | } 38 | 39 | _tvshow = { 40 | "title": "unknown", 41 | "original_available": "1970-01-01", 42 | "summary": "unknown", 43 | "extra": { 44 | "[plugin_id]": { 45 | "poster": ["unknown"], 46 | "backdrop": ["unknown"], 47 | } 48 | }, 49 | } 50 | 51 | _tvshow_episode = { 52 | "title": "unknown", 53 | "tagline": "unknown", 54 | "season": 1, 55 | "episode": 1, 56 | "original_available": "1970-01-01", 57 | "summary": "unknown", 58 | "certificate": "unknown", 59 | "genre": ["unknown"], 60 | "actor": ["unknown"], 61 | "writer": ["unknown"], 62 | "director": ["unknown"], 63 | "extra": { 64 | "[plugin_id]": { 65 | "tvshow": { 66 | "title": "unknown", 67 | "original_available": "1970-01-01", 68 | "summary": "unknown", 69 | "extra": { 70 | "[plugin_id]": { 71 | "poster": ["unknown"], 72 | "backdrop": ["unknown"], 73 | } 74 | }, 75 | }, 76 | "rating": {"[plugin_id]": 0}, 77 | "poster": ["unknown"], 78 | } 79 | }, 80 | } 81 | -------------------------------------------------------------------------------- /scraper/functions/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines the function decorator and abstract base class for arguments.""" 2 | __all__ = ["Args", "Func", "findfunc", "functions"] 3 | 4 | import inspect 5 | import logging 6 | import pkgutil 7 | from abc import ABC, abstractmethod 8 | from functools import wraps 9 | from typing import Any, Callable, Type 10 | 11 | _logger = logging.getLogger(__name__) 12 | 13 | 14 | class Args(ABC): 15 | """Abstract base class for function arguments.""" 16 | 17 | def __call__(self, *args, **kwargs): 18 | return self.parse(*args, **kwargs) 19 | 20 | @abstractmethod 21 | def parse(self, rawargs: Any, context: dict) -> "Args": 22 | pass 23 | 24 | @staticmethod 25 | def substitute(obj: Any, context: dict) -> Any: 26 | """Recursively substitute strings in an object with given context.""" 27 | if isinstance(obj, str): 28 | return obj.format(**context) 29 | elif isinstance(obj, list): 30 | return [Args.substitute(item, context) for item in obj] 31 | elif isinstance(obj, dict): 32 | return {k: Args.substitute(v, context) for k, v in obj.items()} 33 | else: 34 | return obj 35 | 36 | 37 | class Func: 38 | """Function decorator for registering functions.""" 39 | 40 | def __init__(self, name: str, args: Type[Args]): 41 | self.name = name 42 | self.args = args 43 | 44 | def __call__(self, func): 45 | @wraps(func) 46 | def wrapped(rawargs: Any, context: dict) -> Any: 47 | return func(self.args()(rawargs, context), context) 48 | 49 | # bind function name to a special attribute 50 | wrapped._funcname = self.name 51 | return wrapped 52 | 53 | 54 | # a dictionary of all registered functions 55 | functions = {} 56 | 57 | # load all marked functions in this package 58 | for loader, modname, _ in pkgutil.walk_packages(__path__): 59 | module = loader.find_spec(modname).loader.load_module(modname) 60 | funcs = inspect.getmembers( 61 | module, lambda m: (inspect.isfunction(m) and hasattr(m, "_funcname")) 62 | ) 63 | _logger.info("Load %d executable functions in %s.py", len(funcs), modname) 64 | functions.update({getattr(func, "_funcname"): func for _, func in funcs}) 65 | 66 | 67 | def findfunc(funcname: str) -> Callable[[Any, dict], Any]: 68 | """Find a registered function by name.""" 69 | func = functions.get(funcname) 70 | if func is None: 71 | _logger.error('Function "%s" not found', funcname) 72 | raise KeyError(f'Function "{funcname}" not found') 73 | return func 74 | -------------------------------------------------------------------------------- /scraper/functions/collect.py: -------------------------------------------------------------------------------- 1 | """The implementation of the collect function.""" 2 | import ast 3 | import logging 4 | import re 5 | import time 6 | from typing import Any 7 | from xml.etree import ElementTree 8 | from xml.etree.ElementTree import Element 9 | 10 | from scraper.exceptions import ResultParseError 11 | from scraper.functions import Args, Func 12 | from scraper.utils import dict_update, re_sub, str_to_etree, strftime, strip 13 | 14 | _logger = logging.getLogger(__name__) 15 | 16 | # define the valid string template pattern 17 | _pattern = re.compile(r"\s*\[.*]\s*") 18 | 19 | 20 | class CollectArgs(Args): 21 | """Arguments for the collect function.""" 22 | 23 | source: Any 24 | into: dict 25 | 26 | def parse(self, rawargs: dict, context: dict) -> "CollectArgs": 27 | source = context[rawargs["source"]] 28 | from_ = rawargs.get("from") 29 | if from_ is not None: 30 | source = _render(from_, source) 31 | 32 | self.source = source 33 | self.into = self.substitute(rawargs["into"], context) 34 | return self 35 | 36 | 37 | @Func("collect", CollectArgs) 38 | def collect(args: CollectArgs, context: dict) -> None: 39 | """Collect data from a source and put it into the context.""" 40 | for ctxkey, tmpl in args.into.items(): 41 | try: 42 | result = _render(tmpl, args.source) 43 | except Exception as e: 44 | _logger.error('Failed to collect "%s" using "%s"', ctxkey, tmpl) 45 | raise ResultParseError from e 46 | 47 | target = context.get(ctxkey) 48 | if isinstance(target, list) and isinstance(result, list): 49 | target.extend(x for x in result if x not in target) 50 | elif isinstance(target, dict) and isinstance(result, dict): 51 | dict_update(target, result) 52 | else: 53 | context[ctxkey] = result 54 | _logger.info('Collected "%s" using "%s"', ctxkey, tmpl) 55 | _logger.debug("<== result: %s", context[ctxkey]) 56 | 57 | 58 | def _render(tmpl: Any, source, etree=None): 59 | """Render a template with the given source.""" 60 | if etree is None and _need_etree(tmpl): 61 | etree = str_to_etree(source) 62 | 63 | if isinstance(tmpl, list): 64 | return [_render(item, source, etree) for item in tmpl] 65 | elif isinstance(tmpl, dict): 66 | return {k: _render(v, source, etree) for k, v in tmpl.items()} 67 | elif isinstance(tmpl, str): 68 | return _render_str(tmpl, source, etree) 69 | return tmpl 70 | 71 | 72 | def _render_str(tmpl: str, source, etree): 73 | """Render a string template with the given source.""" 74 | if len(tmpl.strip()) == 0: 75 | return "" 76 | elif re.fullmatch(_pattern, tmpl) is None: 77 | return tmpl 78 | 79 | # evaluate the string template to get strategy and arguments 80 | finder, expr, *modification = ast.literal_eval(tmpl) 81 | 82 | # find result from source 83 | result = None 84 | if isinstance(source, str): 85 | if finder.startswith("xp_"): 86 | result = _xpath_find(finder[3:], expr, etree) 87 | elif finder.startswith("re_"): 88 | result = _regex_match(finder[3:], expr, source) 89 | elif isinstance(source, dict) and finder == "get": 90 | result = source.get(expr) 91 | 92 | # modify result if needed 93 | if result is not None and result != "" and len(modification) > 0: 94 | modifier, *args = modification 95 | result = _modify(result, modifier, args) 96 | return strip(result) 97 | 98 | 99 | def _need_etree(tmpl: Any): 100 | """Check if the template needs an etree.""" 101 | if isinstance(tmpl, list): 102 | return any(_need_etree(item) for item in tmpl) 103 | elif isinstance(tmpl, dict): 104 | return any(_need_etree(v) for v in tmpl.values()) 105 | elif isinstance(tmpl, str): 106 | return "xp_" in tmpl 107 | return False 108 | 109 | 110 | def _xpath_find(strategy: str, expr: str, etree: Element): 111 | """Find strings in an element tree using xpath.""" 112 | if strategy == "elem": 113 | elem = etree.find(expr) 114 | if elem is not None: 115 | return ElementTree.tostring(elem, encoding="unicode") 116 | elif strategy == "elems": 117 | elist = etree.findall(expr) 118 | return [ElementTree.tostring(e, encoding="unicode") for e in elist] 119 | elif strategy == "text": 120 | return etree.findtext(expr) 121 | elif strategy == "texts": 122 | return list(dict.fromkeys(e.text for e in etree.findall(expr))) 123 | elif strategy.startswith("attr_"): 124 | elem = etree.find(expr) 125 | if elem is not None: 126 | return elem.attrib[strategy[6:]] 127 | elif strategy.startswith("attrs_"): 128 | elist = etree.findall(expr) 129 | return [e.attrib[strategy[7:]] for e in elist] 130 | return None 131 | 132 | 133 | def _regex_match(strategy: str, expr: str, source: str): 134 | """Match strings in a source string using regex.""" 135 | pattern = re.compile(expr, re.DOTALL) 136 | if strategy == "match": 137 | matches = pattern.search(source) 138 | return matches.group(1) if matches else None 139 | elif strategy == "matches": 140 | return list(dict.fromkeys(pattern.findall(source))) 141 | return None 142 | 143 | 144 | def _modify(result: Any, strategy: str, args: list): 145 | """Modify the result using the given strategy and arguments.""" 146 | args_len = len(args) 147 | if strategy == "int": 148 | result = int(result) 149 | if strategy == "float": 150 | result = float(result) 151 | elif strategy == "split" and args_len == 1: 152 | result = result.split(args[0]) 153 | elif strategy == "prefix" and args_len == 1: 154 | result = args[0] + result 155 | elif strategy == "suffix" and args_len == 1: 156 | result = result + args[0] 157 | elif strategy == "re_sub" and args_len == 2: 158 | pattern, repl = args 159 | result = re_sub(result, pattern, repl) 160 | elif strategy == "reformat" and args_len == 2: 161 | orig_pattern, new_pattern = args 162 | result = time.strftime(new_pattern, time.strptime(result, orig_pattern)) 163 | elif strategy == "strftime": 164 | pattern = args[0] 165 | millisecs = args[1] if args_len == 2 else False 166 | result = strftime(result, pattern, millisecs) 167 | return result 168 | -------------------------------------------------------------------------------- /scraper/functions/doh.py: -------------------------------------------------------------------------------- 1 | """The implementation of the doh function.""" 2 | import ast 3 | import base64 4 | import concurrent 5 | import concurrent.futures 6 | import json 7 | import logging 8 | import socket 9 | import struct 10 | import urllib 11 | import urllib.request 12 | from pathlib import Path 13 | from typing import Dict, List, Optional 14 | 15 | from scraper.functions import Args, Func 16 | 17 | _logger = logging.getLogger(__name__) 18 | 19 | # define a global set to store registered hosts 20 | _registered_hosts = set() 21 | 22 | # define a global thread pool executor 23 | _executor = concurrent.futures.ThreadPoolExecutor() 24 | 25 | # define default DoH configuration 26 | _doh_timeout = 5 27 | _doh_cache: Dict[str, str] = {} 28 | _resolvers_conf = Path(__file__).resolve().parent / "../../resolvers.conf" 29 | with open(_resolvers_conf, "r", encoding="utf-8") as doh_reader: 30 | _doh_resolvers = ast.literal_eval(doh_reader.read()) 31 | 32 | 33 | def _patched_getaddrinfo(host, *args, **kwargs): 34 | """Patched version of socket.getaddrinfo.""" 35 | if host not in _registered_hosts: 36 | return _orig_getaddrinfo(host, *args, **kwargs) 37 | 38 | # check if the host is already resolved 39 | if host in _doh_cache: 40 | ip = _doh_cache[host] 41 | _logger.info("Resolved [%s] to [%s] (cached)", host, ip) 42 | return _orig_getaddrinfo(ip, *args, **kwargs) 43 | 44 | # resolve the host using DoH 45 | futures = [] 46 | for resolver in _doh_resolvers: 47 | futures.append(_executor.submit(_doh_query, resolver, host)) 48 | 49 | for future in concurrent.futures.as_completed(futures): 50 | ip = future.result() 51 | if ip is not None: 52 | _logger.info("Resolved [%s] to [%s]", host, ip) 53 | _doh_cache[host] = ip 54 | host = ip 55 | break 56 | 57 | return _orig_getaddrinfo(host, *args, **kwargs) 58 | 59 | 60 | # monkey patch socket.getaddrinfo 61 | _orig_getaddrinfo = socket.getaddrinfo 62 | socket.getaddrinfo = _patched_getaddrinfo 63 | 64 | 65 | def _doh_query(resolver: str, host: str) -> Optional[str]: 66 | """Query the IP address of the given host using the given DoH resolver.""" 67 | 68 | # construct DNS query message (RFC 1035) 69 | header = b"".join( 70 | [ 71 | b"\x00\x00", # ID: 0 72 | b"\x01\x00", # FLAGS: standard recursive query 73 | b"\x00\x01", # QDCOUNT: 1 74 | b"\x00\x00", # ANCOUNT: 0 75 | b"\x00\x00", # NSCOUNT: 0 76 | b"\x00\x00", # ARCOUNT: 0 77 | ] 78 | ) 79 | question = b"".join( 80 | [ 81 | b"".join( 82 | [ 83 | struct.pack("B", len(item)) + item.encode("utf-8") 84 | for item in host.split(".") 85 | ] 86 | ) 87 | + b"\x00", # QNAME: domain name sequence 88 | b"\x00\x01", # QTYPE: A 89 | b"\x00\x01", # QCLASS: IN 90 | ] 91 | ) 92 | message = header + question 93 | 94 | try: 95 | # send GET request to DoH resolver (RFC 8484) 96 | b64message = base64.b64encode(message).decode("utf-8").rstrip("=") 97 | url = f"https://{resolver}/dns-query?dns={b64message}" 98 | headers = {"Content-Type": "application/dns-message"} 99 | _logger.info("DoH request: %s", url) 100 | 101 | request = urllib.request.Request(url, headers=headers, method="GET") 102 | with urllib.request.urlopen(request, timeout=_doh_timeout) as response: 103 | _logger.info("Resolver(%s) response: %s", resolver, response.status) 104 | if response.status != 200: 105 | return None 106 | resp_body = response.read() 107 | 108 | # parse DNS response message (RFC 1035) 109 | # name(compressed):2 + type:2 + class:2 + ttl:4 + rdlength:2 = 12 bytes 110 | first_rdata_start = len(header) + len(question) + 12 111 | # rdata(A record) = 4 bytes 112 | first_rdata_end = first_rdata_start + 4 113 | # convert rdata to IP address 114 | return socket.inet_ntoa(resp_body[first_rdata_start:first_rdata_end]) 115 | except Exception as e: 116 | _logger.error("Resolver(%s) request error: %s", resolver, e) 117 | return None 118 | 119 | 120 | def _doh_query_json(resolver: str, host: str) -> Optional[str]: 121 | """Query the IP address of the given host using the given DoH resolver.""" 122 | url = f"https://{resolver}/dns-query?name={host}&type=A" 123 | headers = {"Accept": "application/dns-json"} 124 | _logger.info("DoH request: %s", url) 125 | try: 126 | request = urllib.request.Request(url, headers=headers, method="GET") 127 | with urllib.request.urlopen(request, timeout=_doh_timeout) as response: 128 | _logger.info("Resolver(%s) response: %s", resolver, response.status) 129 | if response.status != 200: 130 | return None 131 | response_body = response.read().decode("utf-8") 132 | _logger.debug("<== body: %s", response_body) 133 | answer = json.loads(response_body)["Answer"] 134 | return answer[0]["data"] 135 | except Exception as e: 136 | _logger.error("Resolver(%s) request error: %s", resolver, e) 137 | return None 138 | 139 | 140 | class DohArgs(Args): 141 | """Arguments for the doh function.""" 142 | 143 | hosts: List[str] 144 | 145 | def parse(self, rawargs: dict, context: dict) -> "DohArgs": 146 | doh_enabled = context["doh"] 147 | if doh_enabled: 148 | self.hosts = rawargs.get("hosts", []) 149 | if "host" in rawargs: 150 | self.hosts.append(rawargs["host"]) 151 | else: 152 | self.hosts = [] 153 | return self 154 | 155 | 156 | @Func("doh", DohArgs) 157 | def doh(args: DohArgs, _) -> None: 158 | """Put the given hosts into the registered hosts set.""" 159 | _registered_hosts.update(args.hosts) 160 | -------------------------------------------------------------------------------- /scraper/functions/loop.py: -------------------------------------------------------------------------------- 1 | """The implementation of the loop function.""" 2 | import logging 3 | from typing import Generator, List, Optional, Tuple 4 | 5 | from scraper.functions import Args, Func, functions 6 | 7 | _logger = logging.getLogger(__name__) 8 | 9 | 10 | class LoopArgs(Args): 11 | """Arguments for the loop function.""" 12 | 13 | source: list 14 | item: str 15 | steps: List[Tuple[str, dict]] 16 | iferr: Optional[str] 17 | 18 | def parse(self, rawargs: dict, context: dict) -> "LoopArgs": 19 | self.source = context[rawargs["source"]] 20 | self.item = rawargs["item"] 21 | self.steps = [s.popitem() for s in rawargs["steps"]] 22 | self.iferr = rawargs.get("iferr") 23 | return self 24 | 25 | 26 | @Func("loop", LoopArgs) 27 | def loop(args: LoopArgs, context: dict) -> Generator: 28 | """Loop over a list of items and execute steps.""" 29 | for i in range(len(args.source)): 30 | subcontext = { 31 | "$parent": context, 32 | "site": context["site"], 33 | args.item: args.source[i], 34 | } 35 | try: 36 | for funcname, rawargs in args.steps: 37 | # execute the function with subcontext 38 | result = functions[funcname](rawargs, subcontext) 39 | if result is not None: 40 | yield result 41 | args.source[i] = subcontext[args.item] 42 | except Exception as e: 43 | if args.iferr == "continue": 44 | _logger.error("Error occurred in loop", exc_info=True) 45 | continue 46 | raise e 47 | -------------------------------------------------------------------------------- /scraper/functions/request.py: -------------------------------------------------------------------------------- 1 | """The implementation of the HTTP function.""" 2 | import json 3 | import logging 4 | import shelve 5 | import time 6 | import urllib 7 | import urllib.parse 8 | import urllib.request 9 | from http.cookiejar import CookieJar 10 | from pathlib import Path 11 | from typing import Any 12 | 13 | from scraper.exceptions import RequestSendError 14 | from scraper.functions import Args, Func 15 | 16 | _logger = logging.getLogger(__name__) 17 | 18 | # define default HTTP cache configuration 19 | _basedir = Path(__file__).resolve().parent 20 | _cache_prefix = ".cache_" 21 | _cache_expire = 86400 22 | 23 | # define a global opener and install it to urllib.request 24 | _cookie_processor = urllib.request.HTTPCookieProcessor(CookieJar()) 25 | _global_opener = urllib.request.build_opener(_cookie_processor) 26 | urllib.request.install_opener(_global_opener) 27 | 28 | 29 | class HttpArgs(Args): 30 | """Arguments for the HTTP function.""" 31 | 32 | url: str 33 | method: str 34 | headers: dict 35 | body: Any 36 | timeout: float 37 | result: str 38 | 39 | def parse(self, rawargs: dict, context: dict) -> "HttpArgs": 40 | # urlencode the request query string 41 | url = self.substitute(rawargs["url"], context) 42 | url = urllib.parse.quote(url, safe=":/?&=") 43 | 44 | # substitute the request headers 45 | headers = { 46 | k.lower(): self.substitute(v, context) 47 | for k, v in rawargs.get("headers", {}).items() 48 | } 49 | 50 | # process request body according to the content-type 51 | body = self.substitute(rawargs.get("body"), context) 52 | if body is not None: 53 | content_type = headers.get("content-type", "").lower() 54 | if content_type.startswith("application/json"): 55 | body = json.dumps(body, ensure_ascii=False) 56 | elif content_type.startswith("application/x-www-form-urlencoded"): 57 | body = urllib.parse.urlencode(body) 58 | 59 | # construct the arguments 60 | self.url = url 61 | self.method = rawargs["method"].upper() 62 | self.headers = headers 63 | self.body = body 64 | self.timeout = rawargs.get("timeout", 10) 65 | self.result = rawargs["result"] 66 | return self 67 | 68 | 69 | @Func("http", HttpArgs) 70 | def http(args: HttpArgs, context: dict) -> None: 71 | cache_name = _cache_prefix + context["site"] 72 | # send the HTTP request 73 | response = _http_request( 74 | args.url, args.method, args.headers, args.body, args.timeout, cache_name 75 | ) 76 | # put the response into the context 77 | context[args.result] = response 78 | 79 | 80 | def _http_request(url, method, headers, body, timeout, cache_name): 81 | """Send an HTTP request and return the response body.""" 82 | _logger.info("HTTP request: %s %s", method, url) 83 | _logger.debug("==> headers: %s", headers) 84 | _logger.debug("==> body: %s", body) 85 | 86 | # check if the cache is expired 87 | shelve_flag = "c" # creating database if not exist 88 | for cache_file in _basedir.glob(cache_name + "*"): 89 | modify_time = cache_file.stat().st_mtime 90 | if (time.time() - modify_time) > _cache_expire: 91 | shelve_flag = "n" # always create a new, empty database 92 | 93 | # send the request and cache the response 94 | with shelve.open(str(_basedir / cache_name), shelve_flag) as cache: 95 | cache_key = url + str(body) 96 | if cache_key in cache: 97 | response_body = cache[cache_key] 98 | _logger.info("HTTP response: cached") 99 | _logger.debug("<== body: %s", response_body) 100 | return response_body 101 | 102 | try: 103 | body = body.encode("utf-8") if body is not None else None 104 | request = urllib.request.Request(url, body, headers, method=method) 105 | with urllib.request.urlopen(request, timeout=timeout) as response: 106 | response_body = response.read().decode("utf-8") 107 | if 200 <= response.status < 300: 108 | cache[cache_key] = response_body 109 | _logger.info("HTTP response: %s", response.status) 110 | _logger.debug("<== headers: %s", response.headers) 111 | _logger.debug("<== body: %s", response_body) 112 | return response_body 113 | except Exception as e: 114 | _logger.error("HTTP request error: %s", e) 115 | raise RequestSendError from e 116 | -------------------------------------------------------------------------------- /scraper/functions/retval.py: -------------------------------------------------------------------------------- 1 | """The implementation of the retval function.""" 2 | import ast 3 | from datetime import datetime 4 | from typing import Any, Optional, Union 5 | 6 | from scraper.exceptions import StopSignal 7 | from scraper.functions import Args, Func 8 | 9 | 10 | class RetvalArgs(Args): 11 | """Arguments for the retval function.""" 12 | 13 | condition: bool 14 | ctxkey: Optional[str] 15 | 16 | def parse(self, rawargs: dict, context: dict) -> "RetvalArgs": 17 | condition = True 18 | 19 | ifempty = rawargs.get("ifempty") 20 | if ifempty is not None: 21 | obj = context.get(ifempty) 22 | condition &= obj is None or len(obj) == 0 23 | 24 | notempty = rawargs.get("notempty") 25 | if notempty is not None: 26 | obj = context.get(notempty) 27 | condition &= obj is not None and len(obj) > 0 28 | 29 | compare = rawargs.get("compare") 30 | if compare is not None: 31 | left_key, operator, right_key = ast.literal_eval(compare) 32 | left = context.get(left_key) 33 | right = context.get(right_key) 34 | condition &= _compare(left, operator, right) 35 | 36 | self.condition = condition 37 | self.ctxkey = rawargs.get("source") 38 | return self 39 | 40 | 41 | @Func("retval", RetvalArgs) 42 | def retval(args: RetvalArgs, context: dict) -> Any: 43 | """Return the value from context with given key.""" 44 | if args.condition: 45 | if args.ctxkey is not None: 46 | return context[args.ctxkey] 47 | else: 48 | raise StopSignal 49 | 50 | 51 | def _compare(left: Any, operator: str, right: Any) -> bool: 52 | """Compare two values with the given operator.""" 53 | if left is None or right is None: 54 | return True # ignore compare if either value is None 55 | if isinstance(left, (int, float)) and isinstance(right, (int, float)): 56 | return _compare_num(left, operator, right) 57 | if isinstance(left, str) and isinstance(right, str): 58 | return _compare_num(_timestamp(left), operator, _timestamp(right)) 59 | return False 60 | 61 | 62 | def _compare_num( 63 | left: Union[int, float], operator: str, right: Union[int, float] 64 | ) -> bool: 65 | """Compare two numbers with the given operator.""" 66 | if operator == "==": 67 | return left == right 68 | elif operator == "!=": 69 | return left != right 70 | elif operator == ">": 71 | return left > right 72 | elif operator == ">=": 73 | return left >= right 74 | elif operator == "<": 75 | return left < right 76 | elif operator == "<=": 77 | return left <= right 78 | else: 79 | return False 80 | 81 | 82 | def _timestamp(time_str: str) -> float: 83 | """Convert a time string to timestamp.""" 84 | if len(time_str) == 4: 85 | format_str = "%Y" 86 | elif len(time_str) == 7: 87 | format_str = "%Y-%m" 88 | else: 89 | format_str = "%Y-%m-%d" 90 | 91 | try: 92 | return datetime.strptime(time_str, format_str).timestamp() 93 | except ValueError: 94 | return 0 95 | -------------------------------------------------------------------------------- /scraper/scraper.py: -------------------------------------------------------------------------------- 1 | """Entry point for the scraper.""" 2 | import argparse 3 | import json 4 | import logging 5 | import threading 6 | import time 7 | from pathlib import Path 8 | from typing import Any, Dict, List, Optional 9 | 10 | from scraper.enums import lang_type, video_type 11 | from scraper.exceptions import ScrapeError, StopSignal 12 | from scraper.fake import fake_result 13 | from scraper.functions import findfunc 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | # define default scraping configuration path 18 | _basedir = Path(__file__).resolve().parent 19 | _flow_path = _basedir / "../scrapeflows" 20 | _flowconf_path = _basedir / "../scrapeflows.conf" 21 | 22 | # define maximum number of results to return 23 | _maxlimit = 10 24 | _results: List[Any] = [] 25 | 26 | 27 | def scrape(plugin_id: str) -> str: 28 | """Scrape video information from given arguments.""" 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument("--input", type=str, required=True) 31 | parser.add_argument("--type", type=video_type, required=True) 32 | parser.add_argument("--lang", type=lang_type, required=False) 33 | parser.add_argument("--limit", type=int, default=_maxlimit) 34 | parser.add_argument("--allowguess", action="store_true", default=False) 35 | parser.add_argument("--loglevel", type=str, default="critical") 36 | 37 | args = parser.parse_known_args()[0] 38 | videotype = args.type.value 39 | language = args.lang.value if args.lang is not None else None 40 | maxlimit = min(args.limit, _maxlimit) 41 | loglevel = args.loglevel.upper() 42 | 43 | # set basic logging configuration 44 | logformat = ( 45 | "%(asctime)s %(threadName)s %(levelname)s " 46 | "%(filename)s:%(lineno)d - %(message)s" 47 | ) 48 | logging.basicConfig(level=getattr(logging, loglevel), format=logformat) 49 | 50 | # parse --input argument as JSON 51 | jsoninput = json.loads(args.input) 52 | if jsoninput["title"] == "--install": 53 | return fake_result(plugin_id, videotype) 54 | initialval = { 55 | "title": jsoninput["title"], 56 | "season": jsoninput.get("season", 0), 57 | "episode": jsoninput.get("episode", 1), 58 | "available": jsoninput.get("original_available", None), 59 | "year": str(jsoninput.get("original_available", ""))[:4], 60 | "lang": language, 61 | "limit": maxlimit, 62 | "version": _version(plugin_id), 63 | } 64 | 65 | # load and execute scrape flows using multithreading 66 | start = time.time() 67 | taskqueue: Dict[int, List[threading.Thread]] = {} 68 | for flow in ScrapeFlow.load(_flow_path, videotype, language, initialval): 69 | task = threading.Thread(target=_start, args=(flow, maxlimit)) 70 | tasks = taskqueue.get(flow.priority, []) 71 | tasks.append(task) 72 | taskqueue[flow.priority] = tasks 73 | for tasks in dict(sorted(taskqueue.items(), key=lambda x: x[0])).values(): 74 | if len(_results) >= maxlimit: 75 | break 76 | for task in tasks: 77 | task.start() 78 | for task in tasks: 79 | task.join() 80 | end = time.time() 81 | _logger.info("Total execution time: %.3f seconds", end - start) 82 | return json.dumps( 83 | {"success": True, "result": _results}, ensure_ascii=False, indent=2 84 | ).replace("[plugin_id]", plugin_id) 85 | 86 | 87 | def _start(flow: "ScrapeFlow", limit: int): 88 | """Start a scrape flow and store results.""" 89 | try: 90 | result_gen = flow.start() 91 | while True: 92 | if len(_results) >= limit: 93 | break 94 | try: 95 | _results.append(next(result_gen)) 96 | except StopIteration: 97 | break 98 | except ScrapeError: 99 | _logger.error("Failed to scrape from %s", flow.site, exc_info=True) 100 | 101 | 102 | def _version(plugin_id: str) -> str: 103 | """Split the plugin ID to get the version.""" 104 | if "-" in plugin_id: 105 | version = plugin_id.split("-")[-1] 106 | if version != "plugin": 107 | return f"/{version}" 108 | return "" 109 | 110 | 111 | class ScrapeFlow: 112 | """A flow of steps to scrape video information.""" 113 | 114 | def __init__( 115 | self, 116 | site: str, 117 | steps: list, 118 | context: dict, 119 | priority: Optional[int], 120 | ): 121 | self.site = site 122 | self.steps = steps 123 | self.context = context 124 | self.priority = priority if priority is not None else 999 125 | 126 | def start(self): 127 | """Start the scrape flow and return a generator.""" 128 | for funcname, rawargs in [s.popitem() for s in self.steps]: 129 | # execute the function with context 130 | try: 131 | iterable = findfunc(funcname)(rawargs, self.context) 132 | if iterable is not None: 133 | yield from iterable 134 | except StopSignal: 135 | break 136 | 137 | @staticmethod 138 | def load(path: Path, videotype: str, language: str, initialval: dict): 139 | """Load scrape flows from given path.""" 140 | 141 | flowconf = None 142 | if _flowconf_path.exists(): 143 | with open(_flowconf_path, "r", encoding="utf-8") as conf_reader: 144 | flowconf = json.load(conf_reader) 145 | 146 | for filepath in path.glob("*.json"): 147 | with open(filepath, "r", encoding="utf-8") as def_reader: 148 | flowdef = json.load(def_reader) 149 | site = flowdef["site"] 150 | siteconf = None 151 | if flowconf is not None and site in flowconf: 152 | siteconf = flowconf[site] 153 | 154 | # filter out flows that do not match the video type 155 | if not ScrapeFlow.valid(flowdef, siteconf, videotype, language): 156 | continue 157 | 158 | # generate a flow instance from the definition 159 | steps = list(flowdef["steps"]) 160 | context = initialval.copy() 161 | context["site"] = site 162 | context["doh"] = flowdef.get("doh_enabled", False) 163 | priority = None 164 | if siteconf is not None: 165 | priority = siteconf["priority"] 166 | context.update(siteconf) 167 | yield ScrapeFlow(site, steps, context, priority) 168 | 169 | @staticmethod 170 | def valid(flowdef: Any, siteconf: Any, videotype: str, language: str): 171 | """Check if the flow definition is valid.""" 172 | 173 | if language is not None and "lang" in flowdef: 174 | if language not in flowdef["lang"]: 175 | return False 176 | 177 | if flowdef["type"] != videotype: 178 | return False 179 | 180 | if siteconf is not None: 181 | if not any(videotype.startswith(t) for t in siteconf["types"]): 182 | return False 183 | 184 | return True 185 | -------------------------------------------------------------------------------- /scraper/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for this package.""" 2 | import json 3 | import re 4 | import time 5 | from html.parser import HTMLParser 6 | from typing import Any, List, Optional, Union 7 | from xml.etree import ElementTree 8 | 9 | from scraper.exceptions import ResultParseError 10 | 11 | 12 | def strftime( 13 | timestamp: Union[str, int, float], pattern: str, millisecs: bool = False 14 | ) -> str: 15 | """Format a timestamp with the given pattern.""" 16 | if isinstance(timestamp, str): 17 | timestamp = float(timestamp) 18 | 19 | if millisecs: 20 | timestamp /= 1000 21 | 22 | return time.strftime(pattern, time.localtime(timestamp)) 23 | 24 | 25 | def dict_update(d1: dict, d2: dict) -> dict: 26 | """Recursively update a dictionary.""" 27 | for k, v2 in d2.items(): 28 | v1 = d1.get(k, None) 29 | if isinstance(v1, dict) and isinstance(v2, dict): 30 | d1[k] = dict_update(d1[k], v2) 31 | elif isinstance(v1, list) and isinstance(v2, list): 32 | d1[k].extend(x for x in v2 if x not in v1) 33 | else: 34 | d1[k] = v2 35 | 36 | return d1 37 | 38 | 39 | def strip(obj: Any) -> Any: 40 | """Recursively strip a string, list, or dict.""" 41 | if isinstance(obj, list): 42 | return list(filter(lambda x: x is not None, [strip(i) for i in obj])) 43 | elif isinstance(obj, dict): 44 | return {k: strip(v) for k, v in obj.items()} 45 | elif isinstance(obj, str): 46 | obj = obj.strip() 47 | return obj if obj != "" else None 48 | return obj 49 | 50 | 51 | def re_sub(obj: Any, pattern: str, repl: str) -> Any: 52 | """Recursively replace a pattern in a string, list, or dict.""" 53 | if isinstance(obj, list): 54 | return [re_sub(item, pattern, repl) for item in obj] 55 | elif isinstance(obj, dict): 56 | return {k: re_sub(v, pattern, repl) for k, v in obj.items()} 57 | elif isinstance(obj, str): 58 | return re.sub(pattern, repl, obj) 59 | return obj 60 | 61 | 62 | def str_to_etree(string: str) -> Optional[ElementTree.Element]: 63 | """Convert a string to an ElementTree.""" 64 | string = string.strip() 65 | if string.startswith("{") or string.startswith("["): 66 | return json_to_etree(json.loads(string, strict=False)) 67 | elif string.startswith("<"): 68 | return html_to_etree(string) 69 | return None 70 | 71 | 72 | def json_to_etree(json_obj: Any, tag: str = "root"): 73 | """Convert a JSON object to an ElementTree.""" 74 | element = ElementTree.Element(tag) 75 | if isinstance(json_obj, list): 76 | for i, item in enumerate(json_obj): 77 | element.append(json_to_etree(item, f"i{str(i)}")) 78 | elif isinstance(json_obj, dict): 79 | for k, v in json_obj.items(): 80 | element.append(json_to_etree(v, k)) 81 | elif json_obj is not None: 82 | element.text = str(json_obj) 83 | return element 84 | 85 | 86 | def html_to_etree(html_text: str): 87 | """Convert an HTML text to an ElementTree.""" 88 | return EtreeHTMLParser().parse(html_text) 89 | 90 | 91 | class EtreeHTMLParser(HTMLParser): 92 | """Simple HTML parser that converts HTML to an ElementTree.""" 93 | 94 | tag_stack: List[ElementTree.Element] 95 | cur_tag: Optional[ElementTree.Element] 96 | after_end: bool 97 | 98 | def __init__(self): 99 | super().__init__() 100 | self.tag_stack = [] 101 | self.cur_tag = None 102 | self.after_end = False 103 | 104 | def handle_starttag(self, tag, attrs): 105 | self.after_end = False 106 | self.cur_tag = ElementTree.Element(tag, {k: v or "" for k, v in attrs}) 107 | if len(self.tag_stack) > 0: 108 | self.tag_stack[-1].append(self.cur_tag) 109 | self.tag_stack.append(self.cur_tag) 110 | 111 | def handle_endtag(self, tag): 112 | while any(item.tag == tag for item in self.tag_stack): 113 | self.after_end = True 114 | self.cur_tag = self.tag_stack.pop() 115 | if self.cur_tag.tag == tag: 116 | break 117 | 118 | def handle_data(self, data): 119 | if self.cur_tag is not None: 120 | if self.after_end: 121 | self.cur_tag.tail = data.strip() 122 | else: 123 | self.cur_tag.text = data.strip() 124 | 125 | def error(self, message): 126 | raise ResultParseError 127 | 128 | def parse(self, html): 129 | self.feed(html) 130 | self.close() 131 | return self.cur_tag 132 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Package script for this plugin.""" 2 | import string 3 | from pathlib import Path 4 | 5 | from setuptools import setup 6 | 7 | from version import version 8 | 9 | # get the root directory of this plugin 10 | ROOT_DIR = Path(__file__).resolve().parent 11 | 12 | # use the name of the root directory as the plugin id 13 | PLUGIN_ID = ROOT_DIR.name 14 | 15 | # write the INFO file for this plugin 16 | INFO_TMPL = """ 17 | { 18 | "id": "${plugin_id}-${version}", 19 | "entry_file": "run.sh", 20 | "type": ["movie", "tvshow"], 21 | "language": ["chs"], 22 | "test_example": { 23 | "movie": { 24 | "title": "--install" 25 | }, 26 | "tvshow": { 27 | "title": "--install" 28 | }, 29 | "tvshow_episode": { 30 | "title": "--install", 31 | "season": 1, 32 | "episode": 1 33 | } 34 | } 35 | } 36 | """ 37 | with open(ROOT_DIR / "INFO", "w", encoding="utf-8") as writer: 38 | template = string.Template(INFO_TMPL) 39 | writer.write(template.substitute(plugin_id=PLUGIN_ID, version=version())) 40 | 41 | # use 'python setup.py sdist --formats=zip' command to create the zip file 42 | setup( 43 | name=PLUGIN_ID, 44 | version=version(), 45 | packages=[ 46 | "", 47 | "scraper", 48 | "scraper.functions", 49 | "scrapeflows", 50 | "configserver" 51 | ], 52 | package_data={ 53 | "": ["run.sh", "resolvers.conf", "INFO"], 54 | "scrapeflows": ["*.json"], 55 | "configserver": ["templates/*.html"], 56 | }, 57 | python_requires=">=3.6", 58 | ) 59 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | """Version number management.""" 2 | import subprocess 3 | 4 | __all__ = ["version"] 5 | 6 | 7 | def version(): 8 | """Extract the version number from git describe command.""" 9 | cmd = "git describe --tags --match v[0-9]*".split() 10 | tag_describe = subprocess.check_output(cmd).decode().strip() 11 | tag_version = tag_describe[1:] 12 | if "-" in tag_version: 13 | tag_version = tag_version.split("-", 1)[0] 14 | return tag_version 15 | --------------------------------------------------------------------------------