├── .github
    └── workflows
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── README.zh-CN.md
├── configserver
    ├── __init__.py
    ├── server.py
    └── templates
    │   ├── config.html
    │   ├── index.html
    │   └── source.html
├── main.py
├── preview.png
├── resolvers.conf
├── run.sh
├── scrapeflows
    ├── bangumi_movie.json
    ├── bangumi_tvshow.json
    ├── bangumi_tvshow_episode.json
    ├── douban_movie.json
    ├── douban_tvshow.json
    ├── douban_tvshow_episode.json
    ├── maoyan_movie.json
    ├── maoyan_tvshow.json
    ├── maoyan_tvshow_episode.json
    ├── mtime_movie.json
    ├── mtime_tvshow.json
    ├── mtime_tvshow_episode.json
    ├── tmdb_movie.json
    ├── tmdb_tvshow.json
    └── tmdb_tvshow_episode.json
├── scraper
    ├── __init__.py
    ├── enums.py
    ├── exceptions.py
    ├── fake.py
    ├── functions
    │   ├── __init__.py
    │   ├── collect.py
    │   ├── doh.py
    │   ├── loop.py
    │   ├── request.py
    │   └── retval.py
    ├── scraper.py
    └── utils.py
├── setup.py
└── version.py


/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release Python Package
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v[0-9]*
 7 | 
 8 | permissions:
 9 |   contents: write
10 | 
11 | jobs:
12 |   release:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout Repository
16 |       uses: actions/checkout@v3
17 |     - name: Setup Python
18 |       uses: actions/setup-python@v4
19 |       with:
20 |         python-version: '3.8' 
21 |     - name: Build Package
22 |       run: python setup.py sdist --formats=zip
23 |     - name: Create Release
24 |       uses: ncipollo/release-action@v1
25 |       with:
26 |         artifacts: "dist/*.zip"
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### VisualStudioCode template
  2 | .vscode/*
  3 | 
  4 | # Local History for Visual Studio Code
  5 | .history/
  6 | 
  7 | # Built Visual Studio Code Extensions
  8 | *.vsix
  9 | 
 10 | ### JetBrains template
 11 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 12 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 13 | 
 14 | # User-specific stuff
 15 | .idea/**/workspace.xml
 16 | .idea/**/tasks.xml
 17 | .idea/**/usage.statistics.xml
 18 | .idea/**/dictionaries
 19 | .idea/**/shelf
 20 | 
 21 | # AWS User-specific
 22 | .idea/**/aws.xml
 23 | 
 24 | # Generated files
 25 | .idea/**/contentModel.xml
 26 | 
 27 | # Sensitive or high-churn files
 28 | .idea/**/dataSources/
 29 | .idea/**/dataSources.ids
 30 | .idea/**/dataSources.local.xml
 31 | .idea/**/sqlDataSources.xml
 32 | .idea/**/dynamic.xml
 33 | .idea/**/uiDesigner.xml
 34 | .idea/**/dbnavigator.xml
 35 | 
 36 | # Gradle
 37 | .idea/**/gradle.xml
 38 | .idea/**/libraries
 39 | 
 40 | # Gradle and Maven with auto-import
 41 | # When using Gradle or Maven with auto-import, you should exclude module files,
 42 | # since they will be recreated, and may cause churn.  Uncomment if using
 43 | # auto-import.
 44 | # .idea/artifacts
 45 | # .idea/compiler.xml
 46 | # .idea/jarRepositories.xml
 47 | # .idea/modules.xml
 48 | # .idea/*.iml
 49 | # .idea/modules
 50 | # *.iml
 51 | # *.ipr
 52 | 
 53 | # CMake
 54 | cmake-build-*/
 55 | 
 56 | # Mongo Explorer plugin
 57 | .idea/**/mongoSettings.xml
 58 | 
 59 | # File-based project format
 60 | *.iws
 61 | 
 62 | # IntelliJ
 63 | out/
 64 | 
 65 | # mpeltonen/sbt-idea plugin
 66 | .idea_modules/
 67 | 
 68 | # JIRA plugin
 69 | atlassian-ide-plugin.xml
 70 | 
 71 | # Cursive Clojure plugin
 72 | .idea/replstate.xml
 73 | 
 74 | # SonarLint plugin
 75 | .idea/sonarlint/
 76 | 
 77 | # Crashlytics plugin (for Android Studio and IntelliJ)
 78 | com_crashlytics_export_strings.xml
 79 | crashlytics.properties
 80 | crashlytics-build.properties
 81 | fabric.properties
 82 | 
 83 | # Editor-based Rest Client
 84 | .idea/httpRequests
 85 | 
 86 | # Android studio 3.1+ serialized cache file
 87 | .idea/caches/build_file_checksums.ser
 88 | 
 89 | ### VirtualEnv template
 90 | # Virtualenv
 91 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 92 | .Python
 93 | [Bb]in
 94 | [Ii]nclude
 95 | [Ll]ib
 96 | [Ll]ib64
 97 | [Ll]ocal
 98 | [Ss]cripts
 99 | pyvenv.cfg
100 | .venv
101 | pip-selfcheck.json
102 | 
103 | ### Eclipse template
104 | .metadata
105 | bin/
106 | tmp/
107 | *.tmp
108 | *.bak
109 | *.swp
110 | *~.nib
111 | local.properties
112 | .settings/
113 | .loadpath
114 | .recommenders
115 | 
116 | # External tool builders
117 | .externalToolBuilders/
118 | 
119 | # Locally stored "Eclipse launch configurations"
120 | *.launch
121 | 
122 | # PyDev specific (Python IDE for Eclipse)
123 | *.pydevproject
124 | 
125 | # CDT-specific (C/C++ Development Tooling)
126 | .cproject
127 | 
128 | # CDT- autotools
129 | .autotools
130 | 
131 | # Java annotation processor (APT)
132 | .factorypath
133 | 
134 | # PDT-specific (PHP Development Tools)
135 | .buildpath
136 | 
137 | # sbteclipse plugin
138 | .target
139 | 
140 | # Tern plugin
141 | .tern-project
142 | 
143 | # TeXlipse plugin
144 | .texlipse
145 | 
146 | # STS (Spring Tool Suite)
147 | .springBeans
148 | 
149 | # Code Recommenders
150 | .recommenders/
151 | 
152 | # Annotation Processing
153 | .apt_generated/
154 | .apt_generated_test/
155 | 
156 | # Scala IDE specific (Scala & Java development for Eclipse)
157 | .cache-main
158 | .scala_dependencies
159 | .worksheet
160 | 
161 | # Uncomment this line if you wish to ignore the project description file.
162 | # Typically, this file would be tracked if it contains build/dependency configurations:
163 | #.project
164 | 
165 | ### Windows template
166 | # Windows thumbnail cache files
167 | Thumbs.db
168 | Thumbs.db:encryptable
169 | ehthumbs.db
170 | ehthumbs_vista.db
171 | 
172 | # Dump file
173 | *.stackdump
174 | 
175 | # Folder config file
176 | [Dd]esktop.ini
177 | 
178 | # Recycle Bin used on file shares
179 | $RECYCLE.BIN/
180 | 
181 | # Windows Installer files
182 | *.cab
183 | *.msi
184 | *.msix
185 | *.msm
186 | *.msp
187 | 
188 | # Windows shortcuts
189 | *.lnk
190 | 
191 | ### macOS template
192 | # General
193 | .DS_Store
194 | .AppleDouble
195 | .LSOverride
196 | 
197 | # Icon must end with two \r
198 | Icon
199 | 
200 | # Thumbnails
201 | ._*
202 | 
203 | # Files that might appear in the root of a volume
204 | .DocumentRevisions-V100
205 | .fseventsd
206 | .Spotlight-V100
207 | .TemporaryItems
208 | .Trashes
209 | .VolumeIcon.icns
210 | .com.apple.timemachine.donotpresent
211 | 
212 | # Directories potentially created on remote AFP share
213 | .AppleDB
214 | .AppleDesktop
215 | Network Trash Folder
216 | Temporary Items
217 | .apdisk
218 | 
219 | ### Python template
220 | # Byte-compiled / optimized / DLL files
221 | __pycache__/
222 | *.py[cod]
223 | *$py.class
224 | 
225 | # C extensions
226 | *.so
227 | 
228 | # Distribution / packaging
229 | .Python
230 | build/
231 | develop-eggs/
232 | dist/
233 | downloads/
234 | eggs/
235 | .eggs/
236 | lib/
237 | lib64/
238 | parts/
239 | sdist/
240 | var/
241 | wheels/
242 | share/python-wheels/
243 | *.egg-info/
244 | .installed.cfg
245 | *.egg
246 | MANIFEST
247 | 
248 | # PyInstaller
249 | #  Usually these files are written by a python script from a template
250 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
251 | *.manifest
252 | *.spec
253 | 
254 | # Installer logs
255 | pip-log.txt
256 | pip-delete-this-directory.txt
257 | 
258 | # Unit test / coverage reports
259 | htmlcov/
260 | .tox/
261 | .nox/
262 | .coverage
263 | .coverage.*
264 | .cache
265 | nosetests.xml
266 | coverage.xml
267 | *.cover
268 | *.py,cover
269 | .hypothesis/
270 | .pytest_cache/
271 | cover/
272 | 
273 | # Translations
274 | *.mo
275 | *.pot
276 | 
277 | # Django stuff:
278 | *.log
279 | local_settings.py
280 | db.sqlite3
281 | db.sqlite3-journal
282 | 
283 | # Flask stuff:
284 | instance/
285 | .webassets-cache
286 | 
287 | # Scrapy stuff:
288 | .scrapy
289 | 
290 | # Sphinx documentation
291 | docs/_build/
292 | 
293 | # PyBuilder
294 | .pybuilder/
295 | target/
296 | 
297 | # Jupyter Notebook
298 | .ipynb_checkpoints
299 | 
300 | # IPython
301 | profile_default/
302 | ipython_config.py
303 | 
304 | # pyenv
305 | #   For a library or package, you might want to ignore these files since the code is
306 | #   intended to run in multiple environments; otherwise, check them in:
307 | # .python-version
308 | 
309 | # pipenv
310 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
311 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
312 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
313 | #   install all needed dependencies.
314 | #Pipfile.lock
315 | 
316 | # poetry
317 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
318 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
319 | #   commonly ignored for libraries.
320 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
321 | #poetry.lock
322 | 
323 | # pdm
324 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
325 | #pdm.lock
326 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
327 | #   in version control.
328 | #   https://pdm.fming.dev/#use-with-ide
329 | .pdm.toml
330 | 
331 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
332 | __pypackages__/
333 | 
334 | # Celery stuff
335 | celerybeat-schedule
336 | celerybeat.pid
337 | 
338 | # SageMath parsed files
339 | *.sage.py
340 | 
341 | # Environments
342 | .env
343 | .venv
344 | env/
345 | venv/
346 | ENV/
347 | env.bak/
348 | venv.bak/
349 | 
350 | # Spyder project settings
351 | .spyderproject
352 | .spyproject
353 | 
354 | # Rope project settings
355 | .ropeproject
356 | 
357 | # mkdocs documentation
358 | /site
359 | 
360 | # mypy
361 | .mypy_cache/
362 | .dmypy.json
363 | dmypy.json
364 | 
365 | # Pyre type checker
366 | .pyre/
367 | 
368 | # pytype static type analyzer
369 | .pytype/
370 | 
371 | # Cython debug symbols
372 | cython_debug/
373 | 
374 | # PyCharm
375 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
376 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
377 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
378 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
379 | .idea/
380 | 
381 | # syno-videoinfo-plugin
382 | INFO
383 | .cache_*
384 | scrapeflows.conf
385 | configserver/authorization


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # *Syno*logy Video Info Plugin
 2 | 
 3 | [![GitHub Release](https://img.shields.io/github/v/release/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=blue)](https://github.com/C5H12O5/syno-videoinfo-plugin/releases)
 4 | ![GitHub Stars](https://img.shields.io/github/stars/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=yellow)
 5 | ![GitHub Downloads](https://img.shields.io/github/downloads/C5H12O5/syno-videoinfo-plugin/total?logo=github&style=flat&color=green)
 6 | ![Python Support](https://img.shields.io/badge/Python-3.6+-green?logo=python&style=flat&color=steelblue)
 7 | [![GitHub License](https://img.shields.io/github/license/C5H12O5/syno-videoinfo-plugin?logo=apache&style=flat&color=lightslategray)](LICENSE)
 8 | 
 9 | ###### 📖 English / 📖 [简体中文](README.zh-CN.md)
10 | 
11 | This project is a video information plugin for Synology **Video Station**. It provides a way to fetch metadata from websites
12 | other than the default ones.
13 | 
14 | * Implemented in Python without any third-party dependencies.
15 | * Supports multiple sources, and can be easily extended to support more.
16 | * Has a simple configuration page where you can customize your plugin.
17 | 
18 | ![preview](preview.png)
19 | 
20 | ## Usage
21 | 
22 | Install the plugin:
23 | 
24 | 1. Download the latest release from [***here***](https://github.com/C5H12O5/syno-videoinfo-plugin/releases).
25 | 2. Open your **Video Station**, go to ***Settings*** > ***Video Info Plugin***.
26 | 3. Click **[Add]**, select the downloaded file, and click **[OK]**.
27 | 
28 | Configure the plugin:
29 | 
30 | 1. Open your browser, go to `http://[NAS_IP]:5125` (replace `[NAS_IP]` with your NAS IP address) page.
31 | 2. Change the configuration as you want, and click **[ :floppy_disk: ]** button in the upper right corner.
32 | 3. Go back to your **Video Station**, the configuration should be applied automatically.
33 | > [!NOTE]
34 | > If you upgrade the plugin but the configuration page is not updated, you can restart the configuration service by following steps:
35 | > 1. Open the configuration page, click **[Exit]** button in the upper right corner to close the service.
36 | > 2. Go back to **Video Station**, go to ***Settings*** > ***Video Info Plugin***, and click **[Test Connection]** button to restart the service.
37 | 
38 | ## Requirements
39 | 
40 | * Python 3.6+
41 | * Video Station 2.5.0+ for DSM 6.0
42 | * Video Station 3.0.0+ for DSM 7.0
43 | 
44 | ## References
45 | 
46 | * [The Video Station Metadata](https://kb.synology.com/en-id/DSM/help/VideoStation/metadata?version=7)
47 | * [The Video Station API documentation](https://download.synology.com/download/Document/Software/DeveloperGuide/Package/VideoStation/All/enu/Synology_Video_Station_API_enu.pdf)
48 | 
49 | > Tips for naming video files:
50 | >
51 | > Movie:
52 | >
53 | > * Naming format: Movie_Name (Release_Year).ext
54 | > * Example: Avatar (2009).avi
55 | >
56 | > TV Show:
57 | > * Naming format: TV_Show_Name.SXX.EYY.ext (***S*** as a shorthand for ***Season*** and ***E*** for ***Episode***)
58 | > * Example: Gossip Girl.S03.E04.avi
59 | 
60 | ## Development
61 | 
62 | You can develop your own plugin based on this project easily. Here are the steps:
63 | 
64 | 1. Clone this repository to your local machine:
65 | 
66 | ```shell
67 | git clone https://github.com/C5H12O5/syno-videoinfo-plugin
68 | ```
69 | 
70 | 2. Modify the code as you want, and test it like this:
71 | 
72 | ```shell
73 | python main.py --type movie --input "{\"title\":\"{movie_title}\"}" --limit 1 --loglevel debug
74 | ```
75 | 
76 | 3. Package the plugin using the following command:
77 | 
78 | ```shell
79 | python setup.py sdist --formats=zip
80 | ```
81 | 
82 | ## License
83 | 
84 | [Apache-2.0 license](LICENSE)


--------------------------------------------------------------------------------
/README.zh-CN.md:
--------------------------------------------------------------------------------
 1 | # *Syno*logy Video Info Plugin
 2 | 
 3 | [![GitHub Release](https://img.shields.io/github/v/release/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=blue)](https://github.com/C5H12O5/syno-videoinfo-plugin/releases)
 4 | ![GitHub Stars](https://img.shields.io/github/stars/C5H12O5/syno-videoinfo-plugin?logo=github&style=flat&color=yellow)
 5 | ![GitHub Downloads](https://img.shields.io/github/downloads/C5H12O5/syno-videoinfo-plugin/total?logo=github&style=flat&color=green)
 6 | ![Python Support](https://img.shields.io/badge/Python-3.6+-green?logo=python&style=flat&color=steelblue)
 7 | [![GitHub License](https://img.shields.io/github/license/C5H12O5/syno-videoinfo-plugin?logo=apache&style=flat&color=lightslategray)](LICENSE)
 8 | 
 9 | ###### 📖 [English](README.md) / 📖 简体中文
10 | 
11 | 本项目是群晖 **Video Station** 的第三方视频信息插件，它提供了一种从各大影视数据库平台获取视频元数据的方法。
12 | 
13 | * 使用Python标准库实现，无需安装任何依赖。
14 | * 支持多个数据来源，并且可以轻松扩展。
15 | * 有简单的配置页面，可以自定义你的插件。
16 | 
17 | ![preview](preview.png)
18 | 
19 | ## 使用说明
20 | 
21 | 安装插件：
22 | 
23 | 1. 从[***此处***](https://github.com/C5H12O5/syno-videoinfo-plugin/releases)下载最新版本。
24 | 2. 打开 **Video Station**，进入 ***设置*** > ***视频信息插件***。
25 | 3. 点击 **[新增]**，选择第一步下载的压缩包，然后点击 **[确定]**。
26 | 
27 | 配置插件：
28 | 
29 | 1. 打开你的浏览器，输入`http://[NAS_IP]:5125`（将`[NAS_IP]`替换为你的NAS的IP地址）打开配置页面。
30 | 2. 根据你的需要修改配置，然后点击右上角的 **[ :floppy_disk: ]** 按钮。
31 | 3. 返回你的 **Video Station**，保存的配置将会自动生效。
32 | > [!NOTE]
33 | > 如果升级了插件但是配置页面没有更新，可以通过以下步骤来重启配置服务：
34 | > 1. 打开配置页面，点击右上角的 **[Exit]** 按钮关闭配置服务。
35 | > 2. 返回 **Video Station**，进入 ***设置*** > ***视频信息插件***，点击 **[测试连接]** 按钮即可重新启动配置服务。
36 | 
37 | ## 版本要求
38 | 
39 | * Python 3.6+
40 | * Video Station 2.5.0+（DSM 6.0）
41 | * Video Station 3.0.0+（DSM 7.0）
42 | 
43 | ## 参考文献
44 | 
45 | * [视频元数据](https://kb.synology.cn/zh-cn/DSM/help/VideoStation/metadata?version=7)
46 | * [Video Station API 文档](https://download.synology.com/download/Document/Software/DeveloperGuide/Package/VideoStation/All/enu/Synology_Video_Station_API_enu.pdf)
47 | 
48 | > 视频文件命名提示：
49 | >
50 | > 电影：
51 | >
52 | > * 命名格式：电影名称 (发行年份).ext
53 | > * 例如：Avatar (2009).avi
54 | >
55 | > 电视节目：
56 | > * 命名格式：电视节目名称.SXX.EYY.ext（***S*** 是 ***季数*** 的缩写，***E*** 是 ***集数*** 的缩写）
57 | > * 例如：Gossip Girl.S03.E04.avi
58 | 
59 | ## 如何开发
60 | 
61 | 您可以基于本项目并按以下步骤来开发自己的插件：
62 | 
63 | 1. 将本项目克隆到本地：
64 | 
65 | ```shell
66 | git clone https://github.com/C5H12O5/syno-videoinfo-plugin
67 | ```
68 | 
69 | 2. 根据需要修改代码，并可以使用以下命令进行测试：
70 | 
71 | ```shell
72 | python main.py --type movie --input "{\"title\":\"{movie_title}\"}" --limit 1 --loglevel debug
73 | ```
74 | 
75 | 3. 然后可以使用以下命令进行打包并上传使用：
76 | 
77 | ```shell
78 | python setup.py sdist --formats=zip
79 | ```
80 | 
81 | ## 使用许可
82 | 
83 | [Apache-2.0 license](LICENSE)


--------------------------------------------------------------------------------
/configserver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C5H12O5/syno-videoinfo-plugin/27cc22b763343e85ea64fc5f5e34d078b8a7ab68/configserver/__init__.py


--------------------------------------------------------------------------------
/configserver/server.py:
--------------------------------------------------------------------------------
  1 | """A simple HTTP server for configuration."""
  2 | import ast
  3 | import http
  4 | import json
  5 | import string
  6 | import sys
  7 | from http.server import HTTPServer
  8 | from pathlib import Path
  9 | 
 10 | HOST = "0.0.0.0"
 11 | PORT = 5125
 12 | 
 13 | # define the base directory
 14 | _basedir = Path(__file__).resolve().parent
 15 | 
 16 | # define the configuration files
 17 | _resolvers_conf = _basedir / "../resolvers.conf"
 18 | _flows_conf = _basedir / "../scrapeflows.conf"
 19 | _auth_conf = _basedir / "authorization"
 20 | 
 21 | # initialize the templates
 22 | with open(_basedir / "templates/config.html", "r", encoding="utf-8") as html:
 23 |     _config_tmpl = string.Template(html.read())
 24 | with open(_basedir / "templates/source.html", "r", encoding="utf-8") as html:
 25 |     _source_tmpl = string.Template(html.read())
 26 | with open(_basedir / "templates/index.html", "r", encoding="utf-8") as html:
 27 |     _index_tmpl = string.Template(html.read())
 28 | 
 29 | 
 30 | def render_index(saved=None):
 31 |     """Render the index page."""
 32 |     source_html = ""
 33 |     sites = load_sites()
 34 |     for site, site_conf in sites.items():
 35 |         saved_conf = saved.get(site) if saved is not None else None
 36 |         config_html = render_config(site, site_conf, saved_conf)
 37 |         types = site_conf["types"]
 38 |         doh_enabled = site_conf["doh_enabled"]
 39 |         source = {
 40 |             "site": site,
 41 |             "movie": "selected" if "movie" in types else "disabled",
 42 |             "tvshow": "selected" if "tvshow" in types else "disabled",
 43 |             "doh_enabled": "selected" if doh_enabled else "",
 44 |             "doh_disabled": "selected" if not doh_enabled else "",
 45 |             "priority": len(sites),
 46 |             "config": config_html,
 47 |         }
 48 |         if saved_conf is not None:
 49 |             saved_types = saved_conf["types"]
 50 |             saved_doh = saved_conf["doh"]
 51 |             source["movie"] = "selected" if "movie" in saved_types else ""
 52 |             source["tvshow"] = "selected" if "tvshow" in saved_types else ""
 53 |             source["doh_enabled"] = "selected" if saved_doh else ""
 54 |             source["doh_disabled"] = "selected" if not saved_doh else ""
 55 |             source["priority"] = saved_conf["priority"]
 56 |         source_html += _source_tmpl.substitute(source)
 57 | 
 58 |     return _index_tmpl.substitute(
 59 |         sources=source_html, resolvers=load_resolvers(), version=load_version()
 60 |     )
 61 | 
 62 | 
 63 | def render_config(site, site_conf, saved_conf):
 64 |     """Render the configuration for a site."""
 65 |     config_html = ""
 66 |     config = site_conf.get("config")
 67 |     if config is not None:
 68 |         for key, option in config.items():
 69 |             value = saved_conf.get(key, "") if saved_conf is not None else ""
 70 |             mapping = {"site": site, "key": key, "value": value}
 71 |             mapping.update(option)
 72 |             config_html += _config_tmpl.substitute(mapping)
 73 |     return config_html
 74 | 
 75 | 
 76 | def load_sites():
 77 |     """Load the list of sites and types from flow definitions."""
 78 |     sites = {}
 79 |     for filepath in (_basedir / "../scrapeflows").glob("*.json"):
 80 |         with open(filepath, "r", encoding="utf-8") as def_reader:
 81 |             flowdef = json.load(def_reader)
 82 |         site = flowdef["site"]
 83 |         site_conf = sites.get(site, {})
 84 |         site_conf["doh_enabled"] = flowdef.get("doh_enabled", False)
 85 | 
 86 |         # aggregate types
 87 |         type_ = flowdef["type"].split("_", 1)[0]
 88 |         types = site_conf.get("types", [])
 89 |         if type_ not in types:
 90 |             types.append(type_)
 91 |         site_conf["types"] = types
 92 | 
 93 |         # aggregate config
 94 |         if "config" in flowdef:
 95 |             config = site_conf.get("config", {})
 96 |             config.update(flowdef["config"])
 97 |             site_conf["config"] = config
 98 | 
 99 |         sites[site] = site_conf
100 | 
101 |     return dict(sorted(sites.items(), key=lambda x: x[0]))
102 | 
103 | 
104 | def load_resolvers():
105 |     """Load the list of DoH resolvers."""
106 |     with open(_resolvers_conf, "r", encoding="utf-8") as doh_reader:
107 |         return ast.literal_eval(doh_reader.read())
108 | 
109 | 
110 | def load_version():
111 |     """Load the plugin version from the directory name."""
112 |     dir_name = _basedir.parent.name
113 |     if "-" in dir_name:
114 |         version = dir_name.split("-")[-1]
115 |         if version != "plugin":
116 |             return f"v{version}"
117 |     return ""
118 | 
119 | 
120 | # initialize the index page
121 | _index_html = render_index()
122 | 
123 | 
124 | class RequestHandler(http.server.SimpleHTTPRequestHandler):
125 |     """Request handler for the HTTP server."""
126 | 
127 |     def do_AUTH(self):
128 |         if not _auth_conf.exists():
129 |             return True
130 | 
131 |         with open(_auth_conf, "r", encoding="utf-8") as auth_reader:
132 |             saved_auth = auth_reader.read()
133 | 
134 |         if self.headers.get("Authorization") is not None:
135 |             auth_header = self.headers.get("Authorization")
136 |             if auth_header.split("Basic ")[1] == saved_auth:
137 |                 return True
138 | 
139 |         self.send_response(401)
140 |         self.send_header("WWW-Authenticate", 'Basic realm="Login Required"')
141 |         self.send_header("Content-type", "text/html")
142 |         self.end_headers()
143 |         self.wfile.write(b"Unauthorized")
144 |         return False
145 | 
146 |     def do_GET(self):
147 |         if not self.do_AUTH():
148 |             return
149 | 
150 |         self.send_response(200)
151 |         self.send_header("Content-type", "text/html")
152 |         self.end_headers()
153 | 
154 |         if self.path == "/":
155 |             # index page
156 |             if _flows_conf.exists():
157 |                 with open(_flows_conf, "r", encoding="utf-8") as conf_reader:
158 |                     saved_conf = json.load(conf_reader)
159 |                 self.wfile.write(render_index(saved_conf).encode("utf-8"))
160 |             else:
161 |                 self.wfile.write(_index_html.encode("utf-8"))
162 | 
163 |         elif self.path == "/exit":
164 |             # close the server
165 |             self.server.server_close()
166 |             sys.exit()
167 | 
168 |     def do_POST(self):
169 |         if not self.do_AUTH():
170 |             return
171 | 
172 |         self.send_response(200)
173 |         self.end_headers()
174 |         content_length = int(self.headers["Content-Length"])
175 |         request_body = self.rfile.read(content_length)
176 | 
177 |         if self.path == "/save":
178 |             # save the configuration
179 |             conf = json.loads(request_body.decode("utf-8"))
180 |             with open(_flows_conf, "w", encoding="utf-8") as conf_writer:
181 |                 conf_writer.write(json.dumps(
182 |                     conf["flows"], ensure_ascii=False, indent=2
183 |                 ))
184 |             with open(_resolvers_conf, "w", encoding="utf-8") as doh_writer:
185 |                 doh_writer.write(json.dumps(
186 |                     conf["resolvers"], ensure_ascii=False, indent=2
187 |                 ))
188 | 
189 |         elif self.path == "/auth":
190 |             # save the authorization
191 |             with open(_auth_conf, "w", encoding="utf-8") as auth_writer:
192 |                 auth_writer.write(request_body.decode("utf-8"))
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     httpd = HTTPServer((HOST, PORT), RequestHandler)
197 |     httpd.serve_forever()
198 | 


--------------------------------------------------------------------------------
/configserver/templates/config.html:
--------------------------------------------------------------------------------
1 | <div class="row">
2 |     <div class="input-field col s12">
3 |         <i class="material-symbols-outlined prefix">${icon}</i>
4 |         <input id="${site}-${key}" value="${value}" type="text" class="validate ${site}-config">
5 |         <label for="${site}-${key}">${name}</label>
6 |     </div>
7 | </div>


--------------------------------------------------------------------------------
/configserver/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <title>Synology Video Info Plugin</title>
  6 |     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  7 |     <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1.0" />
  8 |     <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons" />
  9 |     <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined" />
 10 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@materializecss/materialize@1.2.2/dist/css/materialize.min.css" />
 11 |     <style>
 12 |         .chip {
 13 |             font-size: 14px;
 14 |             align-items: center;
 15 |             display: inline-flex;
 16 |             border: 1px solid #e4e4e4;
 17 |             border-radius: 8px;
 18 |             background-color: transparent;
 19 |             padding: 0 6px 0 12px;
 20 |         }
 21 |     </style>
 22 | </head>
 23 | 
 24 | <body>
 25 |     <nav class="light-blue lighten-1" role="navigation">
 26 |         <div class="nav-wrapper container">
 27 |             <a class="brand-logo">Synology Video Info Plugin</a>
 28 |             <ul class="right hide-on-med-and-down">
 29 |                 <li>
 30 |                     <a href="https://github.com/C5H12O5/syno-videoinfo-plugin">
 31 |                         <i class="material-symbols-outlined right">open_in_new</i>
 32 |                         GitHub
 33 |                     </a>
 34 |                 </li>
 35 |                 <li>
 36 |                     <a href="#auth" class="modal-trigger">
 37 |                         <i class="material-symbols-outlined right">admin_panel_settings</i>
 38 |                         Auth
 39 |                     </a>
 40 |                 </li>
 41 |                 <li>
 42 |                     <a href="#exit" class="modal-trigger">
 43 |                         <i class="material-symbols-outlined right">power_settings_new</i>
 44 |                         Exit
 45 |                     </a>
 46 |                 </li>
 47 |             </ul>
 48 |         </div>
 49 |     </nav>
 50 | 
 51 |     <div class="section no-pad-bot">
 52 |         <div class="container">
 53 |             <div class="row">
 54 |                 <div class="col s6">
 55 |                     <h6>Metadata Sources:</h6>
 56 |                 </div>
 57 |                 <div class="col s6 right-align">
 58 |                     <a href="#save" class="modal-trigger">
 59 |                         <i class="material-symbols-outlined" style="font-size:3rem;">save</i>
 60 |                     </a>
 61 |                 </div>
 62 |             </div>
 63 |             <ul class="collapsible">
 64 |                 ${sources}
 65 |             </ul>
 66 |             <div class="row">
 67 |                 <div class="col s12">
 68 |                     <h6>DNS-over-HTTPS Resolvers:</h6>
 69 |                     <div id="resolvers" class="chips"></div>
 70 |                 </div>
 71 |             </div>
 72 |             <h5 class="right-align">${version}</h5>
 73 |         </div>
 74 |     </div>
 75 | 
 76 |     <div id="exit" class="modal">
 77 |         <div class="modal-content">
 78 |             <h6>Exit and close the page?</h6>
 79 |         </div>
 80 |         <div class="modal-footer">
 81 |             <a class="modal-close waves-effect btn-flat">Cancel</a>
 82 |             <a id="exit-btn" class="modal-close waves-effect waves-red btn-flat">Confirm</a>
 83 |         </div>
 84 |     </div>
 85 | 
 86 |     <div id="save" class="modal">
 87 |         <div class="modal-content">
 88 |             <h6>Save the changes?</h6>
 89 |         </div>
 90 |         <div class="modal-footer">
 91 |             <a class="modal-close waves-effect btn-flat">Cancel</a>
 92 |             <a id="save-btn" class="modal-close waves-effect waves-green btn-flat">Confirm</a>
 93 |         </div>
 94 |     </div>
 95 | 
 96 |     <div id="auth" class="modal">
 97 |         <div class="modal-content">
 98 |             <h6>Enable basic authentication?</h6>
 99 |             <div class="row">
100 |                 <div class="input-field col s6">
101 |                     <input id="username" type="text" class="validate">
102 |                     <label for="username">Username</label>
103 |                 </div>
104 |                 <div class="input-field col s6">
105 |                     <input id="password" type="password" class="validate">
106 |                     <label for="password">Password</label>
107 |                 </div>
108 |             </div>
109 |         </div>
110 |         <div class="modal-footer">
111 |             <a class="modal-close waves-effect btn-flat">Cancel</a>
112 |             <a id="auth-btn" class="modal-close waves-effect waves-green btn-flat">Confirm</a>
113 |         </div>
114 |     </div>
115 | 
116 |     <script src="https://cdn.jsdelivr.net/npm/@materializecss/materialize@1.2.2/dist/js/materialize.min.js"></script>
117 |     <script type="text/javascript">
118 |         const resolvers = ${resolvers};
119 |         document.addEventListener('DOMContentLoaded', () => {
120 |             M.AutoInit();
121 | 
122 |             var resolversElem = document.getElementById('resolvers');
123 |             M.Chips.init(resolversElem, {
124 |                 placeholder: 'Set up resolvers',
125 |                 secondaryPlaceholder: '+ New resolver',
126 |                 data: resolvers.map(resolver => ({tag: resolver})),
127 |                 autocompleteOptions: {
128 |                     data: resolvers.reduce((obj, resolver) => {
129 |                         obj[resolver] = null;
130 |                         return obj;
131 |                     }, {})
132 |                 }
133 |             });
134 | 
135 |             var saveBtn = document.getElementById('save-btn');
136 |             saveBtn.addEventListener('click', event => {
137 |                 event.preventDefault();
138 | 
139 |                 var flowsConf = {};
140 |                 document.querySelectorAll('form').forEach(form => {
141 |                     var typesElem = document.getElementById(form.id + '-types');
142 |                     var dohElem = document.getElementById(form.id + '-doh');
143 |                     var priorityElem = document.getElementById(form.id + '-priority');
144 |                     flowsConf[form.id] = {
145 |                         types: M.FormSelect.getInstance(typesElem).getSelectedValues(),
146 |                         doh: M.FormSelect.getInstance(dohElem).getSelectedValues()[0] == '1',
147 |                         priority: parseInt(priorityElem.value)
148 |                     };
149 |                     var configs = document.getElementsByClassName(form.id + '-config');
150 |                     [...configs].forEach(config => {
151 |                         flowsConf[form.id][config.id.substring(form.id.length + 1)] = config.value;
152 |                     });
153 |                 });
154 | 
155 |                 var resolversInst = M.Chips.getInstance(resolversElem);
156 |                 var resolversConf = resolversInst.chipsData.map(chip => chip.tag);
157 | 
158 |                 fetch('/save', {
159 |                     method: 'POST',
160 |                     headers: {
161 |                         'Content-Type': 'application/json'
162 |                     },
163 |                     body: JSON.stringify({ flows: flowsConf, resolvers: resolversConf }, null, 2)
164 |                 }).then(response => {
165 |                     if (response.ok) {
166 |                         M.toast({
167 |                             html: 'Save successful!',
168 |                             classes: 'center-align green lighten-2',
169 |                             displayLength: 1000
170 |                         })
171 |                     } else {
172 |                         throw new Error(response.statusText);
173 |                     }
174 |                 }).catch(error => {
175 |                     M.toast({ html: error, classes: 'center-align red lighten-2' })
176 |                 });
177 |             });
178 | 
179 |             var authBtn = document.getElementById('auth-btn');
180 |             authBtn.addEventListener('click', event => {
181 |                 event.preventDefault();
182 | 
183 |                 var username = document.getElementById('username').value;
184 |                 var password = document.getElementById('password').value;
185 |                 if (username == '' && password == '') {
186 |                     return;
187 |                 }
188 | 
189 |                 fetch('/auth', {
190 |                     method: 'POST',
191 |                     headers: {
192 |                         'Content-Type': 'application/json'
193 |                     },
194 |                     body: btoa(username + ':' + password)
195 |                 }).then(response => {
196 |                     if (response.ok) {
197 |                         M.toast({
198 |                             html: 'Authentication enabled!',
199 |                             classes: 'center-align green lighten-2',
200 |                             displayLength: 1000
201 |                         })
202 |                     } else {
203 |                         throw new Error(response.statusText);
204 |                     }
205 |                 }).catch(error => {
206 |                     M.toast({ html: error, classes: 'center-align red lighten-2' })
207 |                 });
208 |             });
209 | 
210 |             var exitBtn = document.getElementById('exit-btn');
211 |             exitBtn.addEventListener('click', event => {
212 |                 event.preventDefault();
213 | 
214 |                 fetch('/exit').then(response => {
215 |                     M.toast({ html: 'Closing server...', classes: 'center-align red lighten-2' })
216 |                     setTimeout(function() {
217 |                         location.reload(true);
218 |                     }, 1500);
219 |                 }).catch(error => {
220 |                     location.reload(true);
221 |                 });
222 |             });
223 |         });
224 |     </script>
225 | </body>
226 | 
227 | </html>


--------------------------------------------------------------------------------
/configserver/templates/source.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <div class="collapsible-header valign-wrapper">
 3 |         <i class="material-symbols-outlined">edit_square</i>
 4 |         ${site}
 5 |     </div>
 6 |     <div class="collapsible-body">
 7 |         <div class="row">
 8 |             <form id="${site}" class="col s12">
 9 |                 <div class="row">
10 |                     <div class="input-field col s6">
11 |                         <i class="material-symbols-outlined prefix">stacks</i>
12 |                         <select multiple id="${site}-types">
13 |                             <option value="movie" ${movie}>Movie</option>
14 |                             <option value="tvshow" ${tvshow}>TVShow</option>
15 |                         </select>
16 |                         <label>Effective video types</label>
17 |                     </div>
18 |                     <div class="input-field col s3">
19 |                         <i class="material-symbols-outlined prefix">foggy</i>
20 |                         <select id="${site}-doh">
21 |                             <option value="0" ${doh_disabled}>Disabled</option>
22 |                             <option value="1" ${doh_enabled}>Enabled</option>
23 |                         </select>
24 |                         <label>DNS-over-HTTPS</label>
25 |                     </div>
26 |                     <div class="input-field col s3">
27 |                         <i class="material-symbols-outlined prefix">swap_vert</i>
28 |                         <input id="${site}-priority" value="${priority}" type="number" class="validate">
29 |                         <label for="${site}-priority">Priority</label>
30 |                     </div>
31 |                 </div>
32 |                 ${config}
33 |             </form>
34 |         </div>
35 |     </div>
36 | </li>


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """Entry point for this plugin."""
 2 | from pathlib import Path
 3 | 
 4 | import scraper
 5 | 
 6 | if __name__ == "__main__":
 7 |     # Prints the output of the scraper to the console.
 8 |     root_dir = Path(__file__).resolve().parent
 9 |     plugin_id = root_dir.name
10 |     print(scraper.scrape(plugin_id))
11 | 


--------------------------------------------------------------------------------
/preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/C5H12O5/syno-videoinfo-plugin/27cc22b763343e85ea64fc5f5e34d078b8a7ab68/preview.png


--------------------------------------------------------------------------------
/resolvers.conf:
--------------------------------------------------------------------------------
 1 | [
 2 |     # https://developers.cloudflare.com/1.1.1.1/encryption/dns-over-https
 3 |     "1.0.0.1",
 4 |     "1.1.1.1",
 5 | 
 6 |     # https://support.quad9.net/hc/en-us
 7 |     "9.9.9.9",
 8 |     "149.112.112.112",
 9 | 
10 |     # https://support.opendns.com/hc/en-us
11 |     "208.67.220.220",
12 |     "208.67.222.222",
13 | 
14 |     # https://developers.google.com/speed/public-dns/docs/doh
15 |     "dns.google",
16 | 
17 |     # https://adguard-dns.io/public-dns.html
18 |     "dns.adguard-dns.com",
19 | ]


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | BASEDIR=$(dirname $0)
 4 | ARGV=""
 5 | PORT=5125
 6 | 
 7 | escape()
 8 | {
 9 | 	local ARG=$(echo -E "$@" | sed "s/'/'\\\\''/g")
10 | 	echo \'$ARG\'
11 | }
12 | 
13 | i=1
14 | while [ $i -le $# ]; do
15 | 	eval ARG=\$\(escape \${$i}\)
16 | 	ARGV="$ARGV $ARG"
17 | 	i=`expr $i + 1`
18 | done
19 | 
20 | if ! netstat -tuln | grep ":$PORT" >/dev/null; then
21 | 	nohup /usr/bin/env python3 "$BASEDIR"/configserver/server.py > /dev/null 2>&1 &
22 | fi
23 | 
24 | eval "/usr/bin/env python3 $BASEDIR/main.py $ARGV"


--------------------------------------------------------------------------------
/scrapeflows/bangumi_movie.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "movie",
  3 |   "site": "bangumi.tv",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "host": "api.bgm.tv"
  8 |       }
  9 |     },
 10 |     {
 11 |       "http": {
 12 |         "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}",
 13 |         "method": "GET",
 14 |         "headers": {
 15 |           "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 16 |         },
 17 |         "result": "metadata"
 18 |       }
 19 |     },
 20 |     {
 21 |       "collect": {
 22 |         "source": "metadata",
 23 |         "into": {
 24 |           "ids": "['xp_texts', './list//id']"
 25 |         }
 26 |       }
 27 |     },
 28 |     {
 29 |       "loop": {
 30 |         "source": "ids",
 31 |         "item": "id",
 32 |         "steps": [
 33 |           {
 34 |             "http": {
 35 |               "url": "https://api.bgm.tv/v0/subjects/{id}",
 36 |               "method": "GET",
 37 |               "headers": {
 38 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 39 |               },
 40 |               "result": "subject"
 41 |             }
 42 |           },
 43 |           {
 44 |             "collect": {
 45 |               "source": "subject",
 46 |               "into": {
 47 |                 "movie": {
 48 |                   "title": "['xp_text', './name_cn']",
 49 |                   "tagline": "['xp_text', './name']",
 50 |                   "original_available": "['xp_text', './date']",
 51 |                   "summary": "['xp_text', './summary']",
 52 |                   "certificate": "",
 53 |                   "genre": "['xp_texts', './tags//name']",
 54 |                   "actor": [],
 55 |                   "writer": "['re_matches', '\"key\":\"原作\",\"value\":\"([^\"]*?)\"']",
 56 |                   "director": "['re_matches', '\"key\":\"导演\",\"value\":\"([^\"]*?)\"']",
 57 |                   "extra": {
 58 |                     "[plugin_id]": {
 59 |                       "rating": {
 60 |                         "[plugin_id]": "['xp_text', './rating//score', 'float']"
 61 |                       },
 62 |                       "poster": [
 63 |                         "['xp_text', './images//large']"
 64 |                       ],
 65 |                       "backdrop": [
 66 |                         "['xp_text', './images//large']"
 67 |                       ]
 68 |                     }
 69 |                   }
 70 |                 },
 71 |                 "publish_date": "['xp_text', './date']",
 72 |                 "available_date": "{$parent[available]}"
 73 |               }
 74 |             }
 75 |           },
 76 |           {
 77 |             "http": {
 78 |               "url": "https://api.bgm.tv/v0/subjects/{id}/characters",
 79 |               "method": "GET",
 80 |               "headers": {
 81 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 82 |               },
 83 |               "result": "characters"
 84 |             }
 85 |           },
 86 |           {
 87 |             "collect": {
 88 |               "source": "characters",
 89 |               "into": {
 90 |                 "movie": {
 91 |                   "actor": "['xp_texts', './/actors//name']"
 92 |                 }
 93 |               }
 94 |             }
 95 |           },
 96 |           {
 97 |             "retval": {
 98 |               "source": "movie",
 99 |               "compare": "['publish_date', '>=', 'available_date']"
100 |             }
101 |           }
102 |         ]
103 |       }
104 |     }
105 |   ]
106 | }


--------------------------------------------------------------------------------
/scrapeflows/bangumi_tvshow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "tvshow",
 3 |   "site": "bangumi.tv",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "host": "api.bgm.tv"
 8 |       }
 9 |     },
10 |     {
11 |       "http": {
12 |         "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}",
13 |         "method": "GET",
14 |         "headers": {
15 |           "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
16 |         },
17 |         "result": "metadata"
18 |       }
19 |     },
20 |     {
21 |       "collect": {
22 |         "source": "metadata",
23 |         "into": {
24 |           "ids": "['xp_texts', './list//id']"
25 |         }
26 |       }
27 |     },
28 |     {
29 |       "loop": {
30 |         "source": "ids",
31 |         "item": "id",
32 |         "steps": [
33 |           {
34 |             "http": {
35 |               "url": "https://api.bgm.tv/v0/subjects/{id}",
36 |               "method": "GET",
37 |               "headers": {
38 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
39 |               },
40 |               "result": "subject"
41 |             }
42 |           },
43 |           {
44 |             "collect": {
45 |               "source": "subject",
46 |               "into": {
47 |                 "tvshow": {
48 |                   "title": "['xp_text', './name_cn']",
49 |                   "original_available": "['xp_text', './date']",
50 |                   "summary": "['xp_text', './summary']",
51 |                   "extra": {
52 |                     "[plugin_id]": {
53 |                       "poster": [
54 |                         "['xp_text', './images//large']"
55 |                       ],
56 |                       "backdrop": [
57 |                         "['xp_text', './images//large']"
58 |                       ]
59 |                     }
60 |                   }
61 |                 },
62 |                 "publish_date": "['xp_text', './date']",
63 |                 "available_date": "{$parent[available]}"
64 |               }
65 |             }
66 |           },
67 |           {
68 |             "retval": {
69 |               "source": "tvshow",
70 |               "compare": "['publish_date', '>=', 'available_date']"
71 |             }
72 |           }
73 |         ]
74 |       }
75 |     }
76 |   ]
77 | }


--------------------------------------------------------------------------------
/scrapeflows/bangumi_tvshow_episode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "tvshow_episode",
  3 |   "site": "bangumi.tv",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "host": "api.bgm.tv"
  8 |       }
  9 |     },
 10 |     {
 11 |       "http": {
 12 |         "url": "https://api.bgm.tv/search/subject/{title}?type=2&start=0&max_results={limit}",
 13 |         "method": "GET",
 14 |         "headers": {
 15 |           "User-Agent": "C5H12O5/syno-videoinfo-plugin{version} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 16 |         },
 17 |         "result": "metadata"
 18 |       }
 19 |     },
 20 |     {
 21 |       "collect": {
 22 |         "source": "metadata",
 23 |         "into": {
 24 |           "ids": "['xp_texts', './list//id']"
 25 |         }
 26 |       }
 27 |     },
 28 |     {
 29 |       "loop": {
 30 |         "source": "ids",
 31 |         "item": "id",
 32 |         "steps": [
 33 |           {
 34 |             "http": {
 35 |               "url": "https://api.bgm.tv/v0/episodes?subject_id={id}&type=0&limit=100&offset=0",
 36 |               "method": "GET",
 37 |               "headers": {
 38 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 39 |               },
 40 |               "result": "episodes"
 41 |             }
 42 |           },
 43 |           {
 44 |             "collect": {
 45 |               "source": "episodes",
 46 |               "into": {
 47 |                 "ep": "['re_match', '\"ep\":{$parent[episode]},[^{{}}]*?\"id\":(\\d*?),']"
 48 |               }
 49 |             }
 50 |           },
 51 |           {
 52 |             "retval": {
 53 |               "ifempty": "ep"
 54 |             }
 55 |           },
 56 |           {
 57 |             "http": {
 58 |               "url": "https://api.bgm.tv/v0/subjects/{id}",
 59 |               "method": "GET",
 60 |               "headers": {
 61 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
 62 |               },
 63 |               "result": "subject"
 64 |             }
 65 |           },
 66 |           {
 67 |             "collect": {
 68 |               "source": "subject",
 69 |               "into": {
 70 |                 "episode": {
 71 |                   "title": "['xp_text', './name_cn']",
 72 |                   "tagline": "['xp_text', './name']",
 73 |                   "original_available": "['xp_text', './date']",
 74 |                   "summary": "['xp_text', './summary']",
 75 |                   "certificate": "",
 76 |                   "genre": "['xp_texts', './tags//name']",
 77 |                   "actor": [],
 78 |                   "writer": "['re_matches', '\"key\":\"原作\",\"value\":\"([^\"]*?)\"']",
 79 |                   "director": "['re_matches', '\"key\":\"导演\",\"value\":\"([^\"]*?)\"']",
 80 |                   "extra": {
 81 |                     "[plugin_id]": {
 82 |                       "tvshow": {
 83 |                         "title": "['xp_text', './name_cn']",
 84 |                         "original_available": "['xp_text', './date']",
 85 |                         "summary": "['xp_text', './summary']",
 86 |                         "extra": {
 87 |                           "[plugin_id]": {
 88 |                             "poster": [
 89 |                               "['xp_text', './images//large']"
 90 |                             ],
 91 |                             "backdrop": [
 92 |                               "['xp_text', './images//large']"
 93 |                             ]
 94 |                           }
 95 |                         }
 96 |                       },
 97 |                       "rating": {
 98 |                         "[plugin_id]": "['xp_text', './rating//score', 'float']"
 99 |                       },
100 |                       "poster": [
101 |                         "['xp_text', './images//large']"
102 |                       ]
103 |                     }
104 |                   }
105 |                 },
106 |                 "publish_date": "['xp_text', './date']",
107 |                 "available_date": "{$parent[available]}"
108 |               }
109 |             }
110 |           },
111 |           {
112 |             "collect": {
113 |               "source": "$parent",
114 |               "into": {
115 |                 "episode": {
116 |                   "season": "['get', 'season']",
117 |                   "episode": "['get', 'episode']"
118 |                 }
119 |               }
120 |             }
121 |           },
122 |           {
123 |             "http": {
124 |               "url": "https://api.bgm.tv/v0/subjects/{id}/characters",
125 |               "method": "GET",
126 |               "headers": {
127 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
128 |               },
129 |               "result": "characters"
130 |             }
131 |           },
132 |           {
133 |             "collect": {
134 |               "source": "characters",
135 |               "into": {
136 |                 "episode": {
137 |                   "actor": "['xp_texts', './/actors//name']"
138 |                 }
139 |               }
140 |             }
141 |           },
142 |           {
143 |             "http": {
144 |               "url": "https://api.bgm.tv/v0/episodes/{ep}",
145 |               "method": "GET",
146 |               "headers": {
147 |                 "User-Agent": "C5H12O5/syno-videoinfo-plugin{$parent[version]} (https://github.com/C5H12O5/syno-videoinfo-plugin)"
148 |               },
149 |               "result": "subject"
150 |             }
151 |           },
152 |           {
153 |             "collect": {
154 |               "source": "subject",
155 |               "into": {
156 |                 "episode": {
157 |                   "tagline": "['xp_text', './name_cn']",
158 |                   "original_available": "['xp_text', './airdate']",
159 |                   "summary": "['xp_text', './desc']"
160 |                 }
161 |               }
162 |             }
163 |           },
164 |           {
165 |             "retval": {
166 |               "source": "episode",
167 |               "compare": "['publish_date', '>=', 'available_date']"
168 |             }
169 |           }
170 |         ]
171 |       }
172 |     }
173 |   ]
174 | }


--------------------------------------------------------------------------------
/scrapeflows/douban_movie.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "movie",
  3 |   "site": "douban.com",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "hosts": [
  8 |           "www.douban.com",
  9 |           "movie.douban.com"
 10 |         ]
 11 |       }
 12 |     },
 13 |     {
 14 |       "http": {
 15 |         "url": "https://www.douban.com/search?cat=1002&q={title}",
 16 |         "method": "GET",
 17 |         "headers": {
 18 |           "Referer": "https://www.douban.com/",
 19 |           "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
 20 |         },
 21 |         "result": "metadata"
 22 |       }
 23 |     },
 24 |     {
 25 |       "collect": {
 26 |         "source": "metadata",
 27 |         "into": {
 28 |           "ids": "['re_matches', '电影].*?sid:\\s*(\\d+)\\s*,']"
 29 |         }
 30 |       }
 31 |     },
 32 |     {
 33 |       "loop": {
 34 |         "source": "ids",
 35 |         "item": "id",
 36 |         "steps": [
 37 |           {
 38 |             "http": {
 39 |               "url": "https://movie.douban.com/subject/{id}/",
 40 |               "method": "GET",
 41 |               "headers": {
 42 |                 "Host": "movie.douban.com",
 43 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
 44 |               },
 45 |               "result": "subject"
 46 |             }
 47 |           },
 48 |           {
 49 |             "collect": {
 50 |               "source": "subject",
 51 |               "from": "['xp_text', './/script[@type=\"application/ld+json\"]']",
 52 |               "into": {
 53 |                 "movie": {
 54 |                   "title": "['xp_text', './name']",
 55 |                   "tagline": "",
 56 |                   "original_available": "['xp_text', './datePublished']",
 57 |                   "certificate": "",
 58 |                   "genre": "['xp_texts', './genre/*']",
 59 |                   "actor": "['xp_texts', './actor//name']",
 60 |                   "writer": "['xp_texts', './author//name']",
 61 |                   "director": "['xp_texts', './director//name']",
 62 |                   "extra": {
 63 |                     "[plugin_id]": {
 64 |                       "rating": {
 65 |                         "[plugin_id]": "['xp_text', './/ratingValue', 'float']"
 66 |                       },
 67 |                       "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']",
 68 |                       "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']"
 69 |                     }
 70 |                   }
 71 |                 },
 72 |                 "publish_date": "['xp_text', './datePublished']",
 73 |                 "available_date": "{$parent[available]}"
 74 |               }
 75 |             }
 76 |           },
 77 |           {
 78 |             "collect": {
 79 |               "source": "subject",
 80 |               "into": {
 81 |                 "movie": {
 82 |                   "summary": "['xp_text', './/span[@property=\"v:summary\"]']",
 83 |                   "extra": {
 84 |                     "[plugin_id]": {
 85 |                       "reference": {
 86 |                         "imdb": "['re_match', 'IMDb:</span>\\s*(.*?)\\s*<br>']"
 87 |                       }
 88 |                     }
 89 |                   }
 90 |                 }
 91 |               }
 92 |             }
 93 |           },
 94 |           {
 95 |             "retval": {
 96 |               "source": "movie",
 97 |               "compare": "['publish_date', '>=', 'available_date']"
 98 |             }
 99 |           }
100 |         ]
101 |       }
102 |     }
103 |   ]
104 | }


--------------------------------------------------------------------------------
/scrapeflows/douban_tvshow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "tvshow",
 3 |   "site": "douban.com",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "hosts": [
 8 |           "www.douban.com",
 9 |           "movie.douban.com"
10 |         ]
11 |       }
12 |     },
13 |     {
14 |       "http": {
15 |         "url": "https://www.douban.com/search?cat=1002&q={title}",
16 |         "method": "GET",
17 |         "headers": {
18 |           "Referer": "https://www.douban.com/",
19 |           "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
20 |         },
21 |         "result": "metadata"
22 |       }
23 |     },
24 |     {
25 |       "collect": {
26 |         "source": "metadata",
27 |         "into": {
28 |           "ids": "['re_matches', '电视剧].*?sid:\\s*(\\d+)\\s*,']"
29 |         }
30 |       }
31 |     },
32 |     {
33 |       "loop": {
34 |         "source": "ids",
35 |         "item": "id",
36 |         "steps": [
37 |           {
38 |             "http": {
39 |               "url": "https://movie.douban.com/subject/{id}/",
40 |               "method": "GET",
41 |               "headers": {
42 |                 "Host": "movie.douban.com",
43 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
44 |               },
45 |               "result": "subject"
46 |             }
47 |           },
48 |           {
49 |             "collect": {
50 |               "source": "subject",
51 |               "from": "['xp_text', './/script[@type=\"application/ld+json\"]']",
52 |               "into": {
53 |                 "tvshow": {
54 |                   "title": "['xp_text', './name']",
55 |                   "original_available": "['xp_text', './datePublished']",
56 |                   "extra": {
57 |                     "[plugin_id]": {
58 |                       "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']",
59 |                       "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']"
60 |                     }
61 |                   }
62 |                 },
63 |                 "publish_date": "['xp_text', './datePublished']",
64 |                 "available_date": "{$parent[available]}"
65 |               }
66 |             }
67 |           },
68 |           {
69 |             "collect": {
70 |               "source": "subject",
71 |               "into": {
72 |                 "tvshow": {
73 |                   "summary": "['xp_text', './/span[@property=\"v:summary\"]']"
74 |                 }
75 |               }
76 |             }
77 |           },
78 |           {
79 |             "retval": {
80 |               "source": "tvshow",
81 |               "compare": "['publish_date', '>=', 'available_date']"
82 |             }
83 |           }
84 |         ]
85 |       }
86 |     }
87 |   ]
88 | }


--------------------------------------------------------------------------------
/scrapeflows/douban_tvshow_episode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "tvshow_episode",
  3 |   "site": "douban.com",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "hosts": [
  8 |           "www.douban.com",
  9 |           "movie.douban.com"
 10 |         ]
 11 |       }
 12 |     },
 13 |     {
 14 |       "http": {
 15 |         "url": "https://www.douban.com/search?cat=1002&q={title}",
 16 |         "method": "GET",
 17 |         "headers": {
 18 |           "Referer": "https://www.douban.com/",
 19 |           "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
 20 |         },
 21 |         "result": "metadata"
 22 |       }
 23 |     },
 24 |     {
 25 |       "collect": {
 26 |         "source": "metadata",
 27 |         "into": {
 28 |           "ids": "['re_matches', '电视剧].*?sid:\\s*(\\d+)\\s*,']"
 29 |         }
 30 |       }
 31 |     },
 32 |     {
 33 |       "loop": {
 34 |         "source": "ids",
 35 |         "item": "id",
 36 |         "steps": [
 37 |           {
 38 |             "http": {
 39 |               "url": "https://movie.douban.com/subject/{id}/",
 40 |               "method": "GET",
 41 |               "headers": {
 42 |                 "Host": "movie.douban.com",
 43 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
 44 |               },
 45 |               "result": "subject"
 46 |             }
 47 |           },
 48 |           {
 49 |             "collect": {
 50 |               "source": "subject",
 51 |               "from": "['xp_text', './/script[@type=\"application/ld+json\"]']",
 52 |               "into": {
 53 |                 "episode": {
 54 |                   "title": "['xp_text', './name']",
 55 |                   "tagline": "",
 56 |                   "original_available": "['xp_text', './datePublished']",
 57 |                   "certificate": "",
 58 |                   "genre": "['xp_texts', './genre/*']",
 59 |                   "actor": "['xp_texts', './actor//name']",
 60 |                   "writer": "['xp_texts', './author//name']",
 61 |                   "director": "['xp_texts', './director//name']",
 62 |                   "extra": {
 63 |                     "[plugin_id]": {
 64 |                       "tvshow": {
 65 |                         "title": "['xp_text', './name']",
 66 |                         "original_available": "['xp_text', './datePublished']",
 67 |                         "extra": {
 68 |                           "[plugin_id]": {
 69 |                             "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']",
 70 |                             "backdrop": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/l/\\\\2']"
 71 |                           }
 72 |                         }
 73 |                       },
 74 |                       "rating": {
 75 |                         "[plugin_id]": "['xp_text', './/ratingValue', 'float']"
 76 |                       },
 77 |                       "poster": "['xp_texts', './image', 're_sub', '(.+/photo)/s_ratio_poster/(public/.+)', '\\\\1/m/\\\\2']"
 78 |                     }
 79 |                   }
 80 |                 },
 81 |                 "publish_date": "['xp_text', './datePublished']",
 82 |                 "available_date": "{$parent[available]}"
 83 |               }
 84 |             }
 85 |           },
 86 |           {
 87 |             "collect": {
 88 |               "source": "subject",
 89 |               "into": {
 90 |                 "episode": {
 91 |                   "summary": "['xp_text', './/span[@property=\"v:summary\"]']",
 92 |                   "extra": {
 93 |                     "[plugin_id]": {
 94 |                       "tvshow": {
 95 |                         "summary": "['xp_text', './/span[@property=\"v:summary\"]']"
 96 |                       },
 97 |                       "reference": {
 98 |                         "imdb": "['re_match', 'IMDb:</span>\\s*(.*?)\\s*<br>']"
 99 |                       }
100 |                     }
101 |                   }
102 |                 }
103 |               }
104 |             }
105 |           },
106 |           {
107 |             "collect": {
108 |               "source": "$parent",
109 |               "into": {
110 |                 "episode": {
111 |                   "season": "['get', 'season']",
112 |                   "episode": "['get', 'episode']"
113 |                 }
114 |               }
115 |             }
116 |           },
117 |           {
118 |             "retval": {
119 |               "source": "episode",
120 |               "compare": "['publish_date', '>=', 'available_date']"
121 |             }
122 |           }
123 |         ]
124 |       }
125 |     }
126 |   ]
127 | }


--------------------------------------------------------------------------------
/scrapeflows/maoyan_movie.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "movie",
 3 |   "site": "maoyan.com",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "host": "i.maoyan.com"
 8 |       }
 9 |     },
10 |     {
11 |       "http": {
12 |         "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1",
13 |         "method": "GET",
14 |         "headers": {
15 |           "Host": "i.maoyan.com",
16 |           "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
17 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
18 |         },
19 |         "result": "metadata"
20 |       }
21 |     },
22 |     {
23 |       "collect": {
24 |         "source": "metadata",
25 |         "into": {
26 |           "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":0']"
27 |         }
28 |       }
29 |     },
30 |     {
31 |       "loop": {
32 |         "source": "ids",
33 |         "item": "id",
34 |         "steps": [
35 |           {
36 |             "http": {
37 |               "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1",
38 |               "method": "GET",
39 |               "headers": {
40 |                 "Host": "i.maoyan.com",
41 |                 "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
42 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
43 |               },
44 |               "result": "subject"
45 |             }
46 |           },
47 |           {
48 |             "collect": {
49 |               "source": "subject",
50 |               "from": "['re_match', 'var AppData = (.*?);\\s*</script>']",
51 |               "into": {
52 |                 "movie": {
53 |                   "title": "['xp_text', './movie/nm']",
54 |                   "tagline": "['xp_text', './movie/enm']",
55 |                   "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
56 |                   "summary": "['xp_text', './movie/dra']",
57 |                   "certificate": "",
58 |                   "genre": "['xp_text', './movie/cat', 'split', ',']",
59 |                   "actor": "['xp_text', './movie/star', 'split', ',']",
60 |                   "writer": [],
61 |                   "director": "['xp_text', './movie/dir', 'split', ',']",
62 |                   "extra": {
63 |                     "[plugin_id]": {
64 |                       "rating": {
65 |                         "[plugin_id]": "['xp_text', './movie/sc', 'float']"
66 |                       },
67 |                       "poster": "['xp_texts', './movie/img']",
68 |                       "backdrop": "['xp_texts', './movie/img']"
69 |                     }
70 |                   }
71 |                 },
72 |                 "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
73 |                 "available_date": "{$parent[available]}"
74 |               }
75 |             }
76 |           },
77 |           {
78 |             "retval": {
79 |               "source": "movie",
80 |               "compare": "['publish_date', '>=', 'available_date']"
81 |             }
82 |           }
83 |         ]
84 |       }
85 |     }
86 |   ]
87 | }


--------------------------------------------------------------------------------
/scrapeflows/maoyan_tvshow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "tvshow",
 3 |   "site": "maoyan.com",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "host": "i.maoyan.com"
 8 |       }
 9 |     },
10 |     {
11 |       "http": {
12 |         "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1",
13 |         "method": "GET",
14 |         "headers": {
15 |           "Host": "i.maoyan.com",
16 |           "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
17 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
18 |         },
19 |         "result": "metadata"
20 |       }
21 |     },
22 |     {
23 |       "collect": {
24 |         "source": "metadata",
25 |         "into": {
26 |           "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":1']"
27 |         }
28 |       }
29 |     },
30 |     {
31 |       "loop": {
32 |         "source": "ids",
33 |         "item": "id",
34 |         "steps": [
35 |           {
36 |             "http": {
37 |               "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1",
38 |               "method": "GET",
39 |               "headers": {
40 |                 "Host": "i.maoyan.com",
41 |                 "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
42 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
43 |               },
44 |               "result": "subject"
45 |             }
46 |           },
47 |           {
48 |             "collect": {
49 |               "source": "subject",
50 |               "from": "['re_match', 'var AppData = (.*?);\\s*</script>']",
51 |               "into": {
52 |                 "tvshow": {
53 |                   "title": "['xp_text', './movie/nm']",
54 |                   "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
55 |                   "summary": "['xp_text', './movie/dra']",
56 |                   "extra": {
57 |                     "[plugin_id]": {
58 |                       "poster": "['xp_texts', './movie/img']",
59 |                       "backdrop": "['xp_texts', './movie/img']"
60 |                     }
61 |                   }
62 |                 },
63 |                 "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
64 |                 "available_date": "{$parent[available]}"
65 |               }
66 |             }
67 |           },
68 |           {
69 |             "retval": {
70 |               "source": "tvshow",
71 |               "compare": "['publish_date', '>=', 'available_date']"
72 |             }
73 |           }
74 |         ]
75 |       }
76 |     }
77 |   ]
78 | }


--------------------------------------------------------------------------------
/scrapeflows/maoyan_tvshow_episode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "tvshow_episode",
  3 |   "site": "maoyan.com",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "host": "i.maoyan.com"
  8 |       }
  9 |     },
 10 |     {
 11 |       "http": {
 12 |         "url": "https://i.maoyan.com/apollo/ajax/search?kw={title}&cityId=1&stype=-1",
 13 |         "method": "GET",
 14 |         "headers": {
 15 |           "Host": "i.maoyan.com",
 16 |           "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
 17 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
 18 |         },
 19 |         "result": "metadata"
 20 |       }
 21 |     },
 22 |     {
 23 |       "collect": {
 24 |         "source": "metadata",
 25 |         "into": {
 26 |           "ids": "['re_matches', '\"id\":(\\d+),[^{{}}]*?\"movieType\":1']"
 27 |         }
 28 |       }
 29 |     },
 30 |     {
 31 |       "loop": {
 32 |         "source": "ids",
 33 |         "item": "id",
 34 |         "steps": [
 35 |           {
 36 |             "http": {
 37 |               "url": "https://i.maoyan.com/asgard/movie/{id}?_v_=yes&channelId=1&cityId=1",
 38 |               "method": "GET",
 39 |               "headers": {
 40 |                 "Host": "i.maoyan.com",
 41 |                 "Referer": "https://i.maoyan.com/apollo/search?searchtype=movie&$from=maoyan",
 42 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
 43 |               },
 44 |               "result": "subject"
 45 |             }
 46 |           },
 47 |           {
 48 |             "collect": {
 49 |               "source": "subject",
 50 |               "from": "['re_match', 'var AppData = (.*?);\\s*</script>']",
 51 |               "into": {
 52 |                 "episode": {
 53 |                   "title": "['xp_text', './movie/nm']",
 54 |                   "tagline": "",
 55 |                   "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
 56 |                   "summary": "['xp_text', './movie/dra']",
 57 |                   "certificate": "",
 58 |                   "genre": "['xp_text', './movie/cat', 'split', ',']",
 59 |                   "actor": "['xp_text', './movie/star', 'split', ',']",
 60 |                   "writer": [],
 61 |                   "director": "['xp_text', './movie/dir', 'split', ',']",
 62 |                   "extra": {
 63 |                     "[plugin_id]": {
 64 |                       "tvshow": {
 65 |                         "title": "['xp_text', './movie/nm']",
 66 |                         "original_available": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
 67 |                         "summary": "['xp_text', './movie/dra']",
 68 |                         "extra": {
 69 |                           "[plugin_id]": {
 70 |                             "poster": "['xp_texts', './movie/img']",
 71 |                             "backdrop": "['xp_texts', './movie/img']"
 72 |                           }
 73 |                         }
 74 |                       },
 75 |                       "rating": {
 76 |                         "[plugin_id]": "['xp_text', './movie/sc', 'float']"
 77 |                       },
 78 |                       "poster": "['xp_texts', './movie/img']"
 79 |                     }
 80 |                   }
 81 |                 },
 82 |                 "publish_date": "['xp_text', './movie/pubDate', 'strftime', '%Y-%m-%d', 'True']",
 83 |                 "available_date": "{$parent[available]}"
 84 |               }
 85 |             }
 86 |           },
 87 |           {
 88 |             "collect": {
 89 |               "source": "$parent",
 90 |               "into": {
 91 |                 "episode": {
 92 |                   "season": "['get', 'season']",
 93 |                   "episode": "['get', 'episode']"
 94 |                 }
 95 |               }
 96 |             }
 97 |           },
 98 |           {
 99 |             "retval": {
100 |               "source": "episode",
101 |               "compare": "['publish_date', '>=', 'available_date']"
102 |             }
103 |           }
104 |         ]
105 |       }
106 |     }
107 |   ]
108 | }


--------------------------------------------------------------------------------
/scrapeflows/mtime_movie.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "movie",
 3 |   "site": "mtime.com",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "host": "front-gateway.mtime.com"
 8 |       }
 9 |     },
10 |     {
11 |       "http": {
12 |         "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2",
13 |         "method": "POST",
14 |         "headers": {
15 |           "Content-Type": "application/x-www-form-urlencoded",
16 |           "Host": "front-gateway.mtime.com",
17 |           "Origin": "http://film.mtime.com",
18 |           "Referer": "http://film.mtime.com/",
19 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
20 |         },
21 |         "body": {
22 |           "keyword": "{title}",
23 |           "searchType": 0,
24 |           "pageIndex": 1,
25 |           "pageSize": "{limit}",
26 |           "year": "{year}"
27 |         },
28 |         "result": "metadata"
29 |       }
30 |     },
31 |     {
32 |       "collect": {
33 |         "source": "metadata",
34 |         "into": {
35 |           "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电影\"']"
36 |         }
37 |       }
38 |     },
39 |     {
40 |       "loop": {
41 |         "source": "ids",
42 |         "item": "id",
43 |         "steps": [
44 |           {
45 |             "http": {
46 |               "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}",
47 |               "method": "GET",
48 |               "headers": {
49 |                 "Host": "front-gateway.mtime.com",
50 |                 "Origin": "http://movie.mtime.com",
51 |                 "Referer": "http://movie.mtime.com/",
52 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
53 |               },
54 |               "result": "subject"
55 |             }
56 |           },
57 |           {
58 |             "collect": {
59 |               "source": "subject",
60 |               "into": {
61 |                 "movie": {
62 |                   "title": "['xp_text', './data/basic/name']",
63 |                   "tagline": "['xp_text', './data/basic/nameEn']",
64 |                   "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']",
65 |                   "summary": "['xp_text', './data/basic/story']",
66 |                   "certificate": "",
67 |                   "genre": "['xp_texts', './data/basic/movieGenres//name']",
68 |                   "actor": "['xp_texts', './data/basic/actors//name']",
69 |                   "writer": "['xp_texts', './data/basic/writers//name']",
70 |                   "director": "['xp_texts', './data/basic/directors//name']",
71 |                   "extra": {
72 |                     "[plugin_id]": {
73 |                       "rating": {
74 |                         "[plugin_id]": "['xp_text', './data/basic/overallRating', 'float']"
75 |                       },
76 |                       "poster": "['xp_texts', './data/basic/img']",
77 |                       "backdrop": "['xp_texts', './data/basic/bigImage']"
78 |                     }
79 |                   }
80 |                 }
81 |               }
82 |             }
83 |           },
84 |           {
85 |             "retval": {
86 |               "source": "movie"
87 |             }
88 |           }
89 |         ]
90 |       }
91 |     }
92 |   ]
93 | }


--------------------------------------------------------------------------------
/scrapeflows/mtime_tvshow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "tvshow",
 3 |   "site": "mtime.com",
 4 |   "steps": [
 5 |     {
 6 |       "doh": {
 7 |         "host": "front-gateway.mtime.com"
 8 |       }
 9 |     },
10 |     {
11 |       "http": {
12 |         "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2",
13 |         "method": "POST",
14 |         "headers": {
15 |           "Content-Type": "application/x-www-form-urlencoded",
16 |           "Host": "front-gateway.mtime.com",
17 |           "Origin": "http://film.mtime.com",
18 |           "Referer": "http://film.mtime.com/",
19 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
20 |         },
21 |         "body": {
22 |           "keyword": "{title}",
23 |           "searchType": 0,
24 |           "pageIndex": 1,
25 |           "pageSize": "{limit}",
26 |           "year": "{year}"
27 |         },
28 |         "result": "metadata"
29 |       }
30 |     },
31 |     {
32 |       "collect": {
33 |         "source": "metadata",
34 |         "into": {
35 |           "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电视剧\"']"
36 |         }
37 |       }
38 |     },
39 |     {
40 |       "loop": {
41 |         "source": "ids",
42 |         "item": "id",
43 |         "steps": [
44 |           {
45 |             "http": {
46 |               "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}",
47 |               "method": "GET",
48 |               "headers": {
49 |                 "Host": "front-gateway.mtime.com",
50 |                 "Origin": "http://movie.mtime.com",
51 |                 "Referer": "http://movie.mtime.com/",
52 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
53 |               },
54 |               "result": "subject"
55 |             }
56 |           },
57 |           {
58 |             "collect": {
59 |               "source": "subject",
60 |               "into": {
61 |                 "tvshow": {
62 |                   "title": "['xp_text', './data/basic/name']",
63 |                   "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']",
64 |                   "summary": "['xp_text', './data/basic/story']",
65 |                   "extra": {
66 |                     "[plugin_id]": {
67 |                       "poster": "['xp_texts', './data/basic/img']",
68 |                       "backdrop": "['xp_texts', './data/basic/img']"
69 |                     }
70 |                   }
71 |                 }
72 |               }
73 |             }
74 |           },
75 |           {
76 |             "retval": {
77 |               "source": "tvshow"
78 |             }
79 |           }
80 |         ]
81 |       }
82 |     }
83 |   ]
84 | }


--------------------------------------------------------------------------------
/scrapeflows/mtime_tvshow_episode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "tvshow_episode",
  3 |   "site": "mtime.com",
  4 |   "steps": [
  5 |     {
  6 |       "doh": {
  7 |         "host": "front-gateway.mtime.com"
  8 |       }
  9 |     },
 10 |     {
 11 |       "http": {
 12 |         "url": "http://front-gateway.mtime.com/mtime-search/search/unionSearch2",
 13 |         "method": "POST",
 14 |         "headers": {
 15 |           "Content-Type": "application/x-www-form-urlencoded",
 16 |           "Host": "front-gateway.mtime.com",
 17 |           "Origin": "http://film.mtime.com",
 18 |           "Referer": "http://film.mtime.com/",
 19 |           "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
 20 |         },
 21 |         "body": {
 22 |           "keyword": "{title}",
 23 |           "searchType": 0,
 24 |           "pageIndex": 1,
 25 |           "pageSize": "{limit}",
 26 |           "year": "{year}"
 27 |         },
 28 |         "result": "metadata"
 29 |       }
 30 |     },
 31 |     {
 32 |       "collect": {
 33 |         "source": "metadata",
 34 |         "into": {
 35 |           "ids": "['re_matches', '\"movieId\":(\\d+),[^{{}}]*?\"movieContentType\":\"电视剧\"']"
 36 |         }
 37 |       }
 38 |     },
 39 |     {
 40 |       "loop": {
 41 |         "source": "ids",
 42 |         "item": "id",
 43 |         "steps": [
 44 |           {
 45 |             "http": {
 46 |               "url": "http://front-gateway.mtime.com/library/movie/detail.api?movieId={id}",
 47 |               "method": "GET",
 48 |               "headers": {
 49 |                 "Host": "front-gateway.mtime.com",
 50 |                 "Origin": "http://movie.mtime.com",
 51 |                 "Referer": "http://movie.mtime.com/",
 52 |                 "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
 53 |               },
 54 |               "result": "subject"
 55 |             }
 56 |           },
 57 |           {
 58 |             "collect": {
 59 |               "source": "subject",
 60 |               "into": {
 61 |                 "episode": {
 62 |                   "title": "['xp_text', './data/basic/name']",
 63 |                   "tagline": "",
 64 |                   "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']",
 65 |                   "summary": "['xp_text', './data/basic/story']",
 66 |                   "certificate": "",
 67 |                   "genre": "['xp_texts', './data/basic/movieGenres//name']",
 68 |                   "actor": "['xp_texts', './data/basic/actors//name']",
 69 |                   "writer": "['xp_texts', './data/basic/writers//name']",
 70 |                   "director": "['xp_texts', './data/basic/directors//name']",
 71 |                   "extra": {
 72 |                     "[plugin_id]": {
 73 |                       "tvshow": {
 74 |                         "title": "['xp_text', './data/basic/name']",
 75 |                         "original_available": "['xp_text', './data/basic/releaseDate', 'reformat', '%Y%m%d', '%Y-%m-%d']",
 76 |                         "summary": "['xp_text', './data/basic/story']",
 77 |                         "extra": {
 78 |                           "[plugin_id]": {
 79 |                             "poster": "['xp_texts', './data/basic/img']",
 80 |                             "backdrop": "['xp_texts', './data/basic/img']"
 81 |                           }
 82 |                         }
 83 |                       },
 84 |                       "rating": {
 85 |                         "[plugin_id]": "['xp_text', './data/basic/overallRating', 'float']"
 86 |                       },
 87 |                       "poster": "['xp_texts', './data/basic/img']"
 88 |                     }
 89 |                   }
 90 |                 }
 91 |               }
 92 |             }
 93 |           },
 94 |           {
 95 |             "collect": {
 96 |               "source": "$parent",
 97 |               "into": {
 98 |                 "episode": {
 99 |                   "season": "['get', 'season']",
100 |                   "episode": "['get', 'episode']"
101 |                 }
102 |               }
103 |             }
104 |           },
105 |           {
106 |             "retval": {
107 |               "source": "episode"
108 |             }
109 |           }
110 |         ]
111 |       }
112 |     }
113 |   ]
114 | }


--------------------------------------------------------------------------------
/scrapeflows/tmdb_movie.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "movie",
  3 |   "site": "themoviedb.org",
  4 |   "doh_enabled": true,
  5 |   "config": {
  6 |     "apikey": {
  7 |       "icon": "key",
  8 |       "name": "API Key"
  9 |     }
 10 |   },
 11 |   "steps": [
 12 |     {
 13 |       "retval": {
 14 |         "ifempty": "apikey"
 15 |       }
 16 |     },
 17 |     {
 18 |       "doh": {
 19 |         "host": "api.tmdb.org"
 20 |       }
 21 |     },
 22 |     {
 23 |       "http": {
 24 |         "url": "https://api.tmdb.org/3/search/movie?api_key={apikey}&language={lang}&query={title}&year={year}&page=1",
 25 |         "method": "GET",
 26 |         "headers": {
 27 |           "Accept": "application/json"
 28 |         },
 29 |         "timeout": 20,
 30 |         "result": "metadata"
 31 |       }
 32 |     },
 33 |     {
 34 |       "collect": {
 35 |         "source": "metadata",
 36 |         "into": {
 37 |           "ids": "['xp_texts', './results//id']"
 38 |         }
 39 |       }
 40 |     },
 41 |     {
 42 |       "loop": {
 43 |         "source": "ids",
 44 |         "item": "id",
 45 |         "steps": [
 46 |           {
 47 |             "http": {
 48 |               "url": "https://api.tmdb.org/3/movie/{id}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=credits,release_dates",
 49 |               "method": "GET",
 50 |               "headers": {
 51 |                 "Accept": "application/json"
 52 |               },
 53 |               "timeout": 20,
 54 |               "result": "subject"
 55 |             }
 56 |           },
 57 |           {
 58 |             "collect": {
 59 |               "source": "subject",
 60 |               "into": {
 61 |                 "movie": {
 62 |                   "title": "['xp_text', './title']",
 63 |                   "tagline": "['xp_text', './tagline']",
 64 |                   "original_available": "['xp_text', './release_date']",
 65 |                   "summary": "['xp_text', './overview']",
 66 |                   "certificate": "['re_match', '\"release_dates\":.*?\"US\".*?\"certification\":\"([^\"]*?)\"']",
 67 |                   "genre": "['xp_texts', './genres//name']",
 68 |                   "actor": "['xp_texts', './credits/cast//name']",
 69 |                   "writer": "['re_matches', '\"name\":\"([^\"]*?)\"[^{{}}]*?\"department\":\"Writing\"']",
 70 |                   "director": "['re_matches', '\"name\":\"([^\"]*?)\"[^{{}}]*?\"department\":\"Directing\"']",
 71 |                   "extra": {
 72 |                     "[plugin_id]": {
 73 |                       "poster": [
 74 |                         "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']"
 75 |                       ],
 76 |                       "backdrop": [
 77 |                         "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']"
 78 |                       ],
 79 |                       "reference": {
 80 |                         "themoviedb": "['xp_text', './id', 'int']",
 81 |                         "imdb": "['xp_text', './imdb_id']"
 82 |                       },
 83 |                       "collection_id": {
 84 |                         "themoviedb": "['xp_text', './belongs_to_collection/id', 'int']"
 85 |                       }
 86 |                     }
 87 |                   }
 88 |                 },
 89 |                 "rating": "['xp_text', './vote_average', 're_sub', '(\\d+\\.\\d)\\d*', '\\\\1']"
 90 |               }
 91 |             }
 92 |           },
 93 |           {
 94 |             "collect": {
 95 |               "source": "rating",
 96 |               "into": {
 97 |                 "movie": {
 98 |                   "extra": {
 99 |                     "[plugin_id]": {
100 |                       "rating": {
101 |                         "[plugin_id]": "['re_match', '(.*)', 'float']"
102 |                       }
103 |                     }
104 |                   }
105 |                 }
106 |               }
107 |             }
108 |           },
109 |           {
110 |             "retval": {
111 |               "source": "movie"
112 |             }
113 |           }
114 |         ]
115 |       }
116 |     }
117 |   ]
118 | }


--------------------------------------------------------------------------------
/scrapeflows/tmdb_tvshow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "tvshow",
 3 |   "site": "themoviedb.org",
 4 |   "doh_enabled": true,
 5 |   "config": {
 6 |     "apikey": {
 7 |       "icon": "key",
 8 |       "name": "API Key"
 9 |     }
10 |   },
11 |   "steps": [
12 |     {
13 |       "retval": {
14 |         "ifempty": "apikey"
15 |       }
16 |     },
17 |     {
18 |       "doh": {
19 |         "host": "api.tmdb.org"
20 |       }
21 |     },
22 |     {
23 |       "http": {
24 |         "url": "https://api.tmdb.org/3/search/tv?api_key={apikey}&language={lang}&query={title}&year={year}&page=1",
25 |         "method": "GET",
26 |         "headers": {
27 |           "Accept": "application/json"
28 |         },
29 |         "timeout": 20,
30 |         "result": "metadata"
31 |       }
32 |     },
33 |     {
34 |       "collect": {
35 |         "source": "metadata",
36 |         "into": {
37 |           "ids": "['xp_texts', './results//id']"
38 |         }
39 |       }
40 |     },
41 |     {
42 |       "loop": {
43 |         "source": "ids",
44 |         "item": "id",
45 |         "steps": [
46 |           {
47 |             "http": {
48 |               "url": "https://api.tmdb.org/3/tv/{id}?api_key={$parent[apikey]}&language={$parent[lang]}",
49 |               "method": "GET",
50 |               "headers": {
51 |                 "Accept": "application/json"
52 |               },
53 |               "timeout": 20,
54 |               "result": "subject"
55 |             }
56 |           },
57 |           {
58 |             "collect": {
59 |               "source": "subject",
60 |               "into": {
61 |                 "tvshow": {
62 |                   "title": "['xp_text', './name']",
63 |                   "original_available": "['xp_text', './first_air_date']",
64 |                   "summary": "['xp_text', './overview']",
65 |                   "extra": {
66 |                     "[plugin_id]": {
67 |                       "poster": [
68 |                         "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']"
69 |                       ],
70 |                       "backdrop": [
71 |                         "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']"
72 |                       ]
73 |                     }
74 |                   }
75 |                 }
76 |               }
77 |             }
78 |           },
79 |           {
80 |             "retval": {
81 |               "source": "tvshow"
82 |             }
83 |           }
84 |         ]
85 |       }
86 |     }
87 |   ]
88 | }


--------------------------------------------------------------------------------
/scrapeflows/tmdb_tvshow_episode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "tvshow_episode",
  3 |   "site": "themoviedb.org",
  4 |   "doh_enabled": true,
  5 |   "config": {
  6 |     "apikey": {
  7 |       "icon": "key",
  8 |       "name": "API Key"
  9 |     }
 10 |   },
 11 |   "steps": [
 12 |     {
 13 |       "retval": {
 14 |         "ifempty": "apikey"
 15 |       }
 16 |     },
 17 |     {
 18 |       "doh": {
 19 |         "host": "api.tmdb.org"
 20 |       }
 21 |     },
 22 |     {
 23 |       "http": {
 24 |         "url": "https://api.tmdb.org/3/search/tv?api_key={apikey}&language={lang}&query={title}&year={year}&page=1",
 25 |         "method": "GET",
 26 |         "headers": {
 27 |           "Accept": "application/json"
 28 |         },
 29 |         "timeout": 20,
 30 |         "result": "metadata"
 31 |       }
 32 |     },
 33 |     {
 34 |       "collect": {
 35 |         "source": "metadata",
 36 |         "into": {
 37 |           "ids": "['xp_texts', './results//id']"
 38 |         }
 39 |       }
 40 |     },
 41 |     {
 42 |       "loop": {
 43 |         "source": "ids",
 44 |         "item": "id",
 45 |         "steps": [
 46 |           {
 47 |             "http": {
 48 |               "url": "https://api.tmdb.org/3/tv/{id}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=content_ratings,external_ids",
 49 |               "method": "GET",
 50 |               "headers": {
 51 |                 "Accept": "application/json"
 52 |               },
 53 |               "timeout": 20,
 54 |               "result": "subject"
 55 |             }
 56 |           },
 57 |           {
 58 |             "collect": {
 59 |               "source": "subject",
 60 |               "into": {
 61 |                 "episode": {
 62 |                   "title": "['xp_text', './name']",
 63 |                   "certificate": "['re_match', '\"content_ratings\":.*?\"US\".*?\"rating\":\"([^\"]*?)\"']",
 64 |                   "genre": "['xp_texts', './genres//name']",
 65 |                   "extra": {
 66 |                     "[plugin_id]": {
 67 |                       "tvshow": {
 68 |                         "title": "['xp_text', './name']",
 69 |                         "original_available": "['xp_text', './first_air_date']",
 70 |                         "summary": "['xp_text', './overview']",
 71 |                         "extra": {
 72 |                           "[plugin_id]": {
 73 |                             "poster": [
 74 |                               "['xp_text', './poster_path', 'prefix', 'https://image.tmdb.org/t/p/w500']"
 75 |                             ],
 76 |                             "backdrop": [
 77 |                               "['xp_text', './backdrop_path', 'prefix', 'https://image.tmdb.org/t/p/original']"
 78 |                             ]
 79 |                           }
 80 |                         }
 81 |                       },
 82 |                       "reference": {
 83 |                         "themoviedb_tv": "['xp_text', './id', 'int']",
 84 |                         "imdb": "['xp_text', './external_ids/imdb_id']"
 85 |                       }
 86 |                     }
 87 |                   }
 88 |                 }
 89 |               }
 90 |             }
 91 |           },
 92 |           {
 93 |             "http": {
 94 |               "url": "https://api.tmdb.org/3/tv/{id}/season/{$parent[season]}/episode/{$parent[episode]}?api_key={$parent[apikey]}&language={$parent[lang]}&append_to_response=credits",
 95 |               "method": "GET",
 96 |               "headers": {
 97 |                 "Accept": "application/json"
 98 |               },
 99 |               "timeout": 20,
100 |               "result": "subject"
101 |             }
102 |           },
103 |           {
104 |             "collect": {
105 |               "source": "subject",
106 |               "into": {
107 |                 "episode": {
108 |                   "tagline": "['xp_text', './name']",
109 |                   "season": "['xp_text', './season_number', 'int']",
110 |                   "episode": "['xp_text', './episode_number', 'int']",
111 |                   "original_available": "['xp_text', './air_date']",
112 |                   "summary": "['xp_text', './overview']",
113 |                   "actor": "['xp_texts', './credits/cast//name']",
114 |                   "writer": "['re_matches', '\"department\":\"Writing\"[^{{}}]*?\"name\":\"([^\"]*?)\"']",
115 |                   "director": "['re_matches', '\"department\":\"Directing\"[^{{}}]*?\"name\":\"([^\"]*?)\"']",
116 |                   "extra": {
117 |                     "[plugin_id]": {
118 |                       "poster": [
119 |                         "['xp_text', './still_path', 'prefix', 'https://image.tmdb.org/t/p/w500']"
120 |                       ]
121 |                     }
122 |                   }
123 |                 },
124 |                 "rating": "['xp_text', './vote_average', 're_sub', '(\\d+\\.\\d)\\d*', '\\\\1']"
125 |               }
126 |             }
127 |           },
128 |           {
129 |             "collect": {
130 |               "source": "subject",
131 |               "into": {
132 |                 "episode": {
133 |                   "actor": "['xp_texts', './credits/guest_stars//name']"
134 |                 }
135 |               }
136 |             }
137 |           },
138 |           {
139 |             "collect": {
140 |               "source": "rating",
141 |               "into": {
142 |                 "episode": {
143 |                   "extra": {
144 |                     "[plugin_id]": {
145 |                       "rating": {
146 |                         "[plugin_id]": "['re_match', '(.*)', 'float']"
147 |                       }
148 |                     }
149 |                   }
150 |                 }
151 |               }
152 |             }
153 |           },
154 |           {
155 |             "retval": {
156 |               "source": "episode"
157 |             }
158 |           }
159 |         ]
160 |       }
161 |     }
162 |   ]
163 | }


--------------------------------------------------------------------------------
/scraper/__init__.py:
--------------------------------------------------------------------------------
1 | """A simple web scraper used by the Synology VideoInfo plugin."""
2 | 
3 | __all__ = ["scrape"]
4 | 
5 | from scraper.scraper import scrape
6 | 


--------------------------------------------------------------------------------
/scraper/enums.py:
--------------------------------------------------------------------------------
 1 | """Enum classes for this package."""
 2 | from enum import Enum, unique
 3 | 
 4 | 
 5 | @unique
 6 | class VideoType(Enum):
 7 |     """Type of video being scraped."""
 8 | 
 9 |     MOVIE = "movie"
10 |     TVSHOW = "tvshow"
11 |     TVSHOW_EPISODE = "tvshow_episode"
12 | 
13 | 
14 | @unique
15 | class Language(Enum):
16 |     """Language and country code, ISO 639-1 and ISO 3166-1."""
17 | 
18 |     CHS = "zh-CN"  # 简体中文 Simplified Chinese
19 |     CHT = "zh-TW"  # 繁体中文 Traditional Chinese
20 |     CSY = "cs-CZ"  # 捷克语 Czech
21 |     DAN = "da-DK"  # 丹麦语 Danish
22 |     ENU = "en-US"  # 英语 English
23 |     FRE = "fr-FR"  # 法语 French
24 |     GER = "de-DE"  # 德语 German
25 |     HUN = "hu-HU"  # 匈牙利语 Hungarian
26 |     ITA = "it-IT"  # 意大利语 Italian
27 |     JPN = "ja-JP"  # 日语 Japanese
28 |     KRN = "ko-KR"  # 韩语 Korean
29 |     NLD = "nl-NL"  # 荷兰语 Nederland
30 |     NOR = "no-NO"  # 挪威语 Norwegian
31 |     PLK = "pl-PL"  # 波兰语 Polish
32 |     PTB = "pt-BR"  # 巴西葡萄牙语 Brazilian Portuguese
33 |     PTG = "pt-PT"  # 葡萄牙语 Portuguese
34 |     RUS = "ru-RU"  # 俄语 Russian
35 |     SPN = "es-ES"  # 西班牙语 Spanish
36 |     SVE = "sv-SE"  # 瑞典语 Swedish
37 |     TRK = "tr-TR"  # 土耳其语 Turkish
38 |     THA = "th-TH"  # 泰语 Thai
39 | 
40 | 
41 | def video_type(value):
42 |     """Convert string to VideoType enum."""
43 |     return VideoType[value.upper()]
44 | 
45 | 
46 | def lang_type(value):
47 |     """Convert string to Language enum."""
48 |     return Language[value.upper()]
49 | 


--------------------------------------------------------------------------------
/scraper/exceptions.py:
--------------------------------------------------------------------------------
 1 | """Exception classes for this package."""
 2 | 
 3 | 
 4 | class ScrapeError(Exception):
 5 |     def __init__(self, error_code: int):
 6 |         self.error_code = error_code
 7 | 
 8 | 
 9 | class RequestSendError(ScrapeError):
10 |     def __init__(self):
11 |         super().__init__(1003)
12 | 
13 | 
14 | class ResultParseError(ScrapeError):
15 |     def __init__(self):
16 |         super().__init__(1004)
17 | 
18 | 
19 | class StopSignal(Exception):
20 |     pass
21 | 


--------------------------------------------------------------------------------
/scraper/fake.py:
--------------------------------------------------------------------------------
 1 | """Fake result for testing."""
 2 | import json
 3 | from typing import Any, List
 4 | 
 5 | 
 6 | def fake_result(plugin_id: str, videotype: str) -> str:
 7 |     """Return fake result."""
 8 |     result: List[Any] = []
 9 |     if videotype == "movie":
10 |         result.append(_movie)
11 |     elif videotype == "tvshow":
12 |         result.append(_tvshow)
13 |     elif videotype == "tvshow_episode":
14 |         result.append(_tvshow_episode)
15 |     return json.dumps(
16 |         {"success": True, "result": result}, ensure_ascii=False, indent=2
17 |     ).replace("[plugin_id]", plugin_id)
18 | 
19 | 
20 | _movie = {
21 |     "title": "unknown",
22 |     "tagline": "unknown",
23 |     "original_available": "1970-01-01",
24 |     "summary": "unknown",
25 |     "certificate": "unknown",
26 |     "genre": ["unknown"],
27 |     "actor": ["unknown"],
28 |     "writer": ["unknown"],
29 |     "director": ["unknown"],
30 |     "extra": {
31 |         "[plugin_id]": {
32 |             "rating": {"[plugin_id]": 0},
33 |             "poster": ["unknown"],
34 |             "backdrop": ["unknown"],
35 |         }
36 |     },
37 | }
38 | 
39 | _tvshow = {
40 |     "title": "unknown",
41 |     "original_available": "1970-01-01",
42 |     "summary": "unknown",
43 |     "extra": {
44 |         "[plugin_id]": {
45 |             "poster": ["unknown"],
46 |             "backdrop": ["unknown"],
47 |         }
48 |     },
49 | }
50 | 
51 | _tvshow_episode = {
52 |     "title": "unknown",
53 |     "tagline": "unknown",
54 |     "season": 1,
55 |     "episode": 1,
56 |     "original_available": "1970-01-01",
57 |     "summary": "unknown",
58 |     "certificate": "unknown",
59 |     "genre": ["unknown"],
60 |     "actor": ["unknown"],
61 |     "writer": ["unknown"],
62 |     "director": ["unknown"],
63 |     "extra": {
64 |         "[plugin_id]": {
65 |             "tvshow": {
66 |                 "title": "unknown",
67 |                 "original_available": "1970-01-01",
68 |                 "summary": "unknown",
69 |                 "extra": {
70 |                     "[plugin_id]": {
71 |                         "poster": ["unknown"],
72 |                         "backdrop": ["unknown"],
73 |                     }
74 |                 },
75 |             },
76 |             "rating": {"[plugin_id]": 0},
77 |             "poster": ["unknown"],
78 |         }
79 |     },
80 | }
81 | 


--------------------------------------------------------------------------------
/scraper/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | """Defines the function decorator and abstract base class for arguments."""
 2 | __all__ = ["Args", "Func", "findfunc", "functions"]
 3 | 
 4 | import inspect
 5 | import logging
 6 | import pkgutil
 7 | from abc import ABC, abstractmethod
 8 | from functools import wraps
 9 | from typing import Any, Callable, Type
10 | 
11 | _logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class Args(ABC):
15 |     """Abstract base class for function arguments."""
16 | 
17 |     def __call__(self, *args, **kwargs):
18 |         return self.parse(*args, **kwargs)
19 | 
20 |     @abstractmethod
21 |     def parse(self, rawargs: Any, context: dict) -> "Args":
22 |         pass
23 | 
24 |     @staticmethod
25 |     def substitute(obj: Any, context: dict) -> Any:
26 |         """Recursively substitute strings in an object with given context."""
27 |         if isinstance(obj, str):
28 |             return obj.format(**context)
29 |         elif isinstance(obj, list):
30 |             return [Args.substitute(item, context) for item in obj]
31 |         elif isinstance(obj, dict):
32 |             return {k: Args.substitute(v, context) for k, v in obj.items()}
33 |         else:
34 |             return obj
35 | 
36 | 
37 | class Func:
38 |     """Function decorator for registering functions."""
39 | 
40 |     def __init__(self, name: str, args: Type[Args]):
41 |         self.name = name
42 |         self.args = args
43 | 
44 |     def __call__(self, func):
45 |         @wraps(func)
46 |         def wrapped(rawargs: Any, context: dict) -> Any:
47 |             return func(self.args()(rawargs, context), context)
48 | 
49 |         # bind function name to a special attribute
50 |         wrapped._funcname = self.name
51 |         return wrapped
52 | 
53 | 
54 | # a dictionary of all registered functions
55 | functions = {}
56 | 
57 | # load all marked functions in this package
58 | for loader, modname, _ in pkgutil.walk_packages(__path__):
59 |     module = loader.find_spec(modname).loader.load_module(modname)
60 |     funcs = inspect.getmembers(
61 |         module, lambda m: (inspect.isfunction(m) and hasattr(m, "_funcname"))
62 |     )
63 |     _logger.info("Load %d executable functions in %s.py", len(funcs), modname)
64 |     functions.update({getattr(func, "_funcname"): func for _, func in funcs})
65 | 
66 | 
67 | def findfunc(funcname: str) -> Callable[[Any, dict], Any]:
68 |     """Find a registered function by name."""
69 |     func = functions.get(funcname)
70 |     if func is None:
71 |         _logger.error('Function "%s" not found', funcname)
72 |         raise KeyError(f'Function "{funcname}" not found')
73 |     return func
74 | 


--------------------------------------------------------------------------------
/scraper/functions/collect.py:
--------------------------------------------------------------------------------
  1 | """The implementation of the collect function."""
  2 | import ast
  3 | import logging
  4 | import re
  5 | import time
  6 | from typing import Any
  7 | from xml.etree import ElementTree
  8 | from xml.etree.ElementTree import Element
  9 | 
 10 | from scraper.exceptions import ResultParseError
 11 | from scraper.functions import Args, Func
 12 | from scraper.utils import dict_update, re_sub, str_to_etree, strftime, strip
 13 | 
 14 | _logger = logging.getLogger(__name__)
 15 | 
 16 | # define the valid string template pattern
 17 | _pattern = re.compile(r"\s*\[.*]\s*")
 18 | 
 19 | 
 20 | class CollectArgs(Args):
 21 |     """Arguments for the collect function."""
 22 | 
 23 |     source: Any
 24 |     into: dict
 25 | 
 26 |     def parse(self, rawargs: dict, context: dict) -> "CollectArgs":
 27 |         source = context[rawargs["source"]]
 28 |         from_ = rawargs.get("from")
 29 |         if from_ is not None:
 30 |             source = _render(from_, source)
 31 | 
 32 |         self.source = source
 33 |         self.into = self.substitute(rawargs["into"], context)
 34 |         return self
 35 | 
 36 | 
 37 | @Func("collect", CollectArgs)
 38 | def collect(args: CollectArgs, context: dict) -> None:
 39 |     """Collect data from a source and put it into the context."""
 40 |     for ctxkey, tmpl in args.into.items():
 41 |         try:
 42 |             result = _render(tmpl, args.source)
 43 |         except Exception as e:
 44 |             _logger.error('Failed to collect "%s" using "%s"', ctxkey, tmpl)
 45 |             raise ResultParseError from e
 46 | 
 47 |         target = context.get(ctxkey)
 48 |         if isinstance(target, list) and isinstance(result, list):
 49 |             target.extend(x for x in result if x not in target)
 50 |         elif isinstance(target, dict) and isinstance(result, dict):
 51 |             dict_update(target, result)
 52 |         else:
 53 |             context[ctxkey] = result
 54 |         _logger.info('Collected "%s" using "%s"', ctxkey, tmpl)
 55 |         _logger.debug("<==  result: %s", context[ctxkey])
 56 | 
 57 | 
 58 | def _render(tmpl: Any, source, etree=None):
 59 |     """Render a template with the given source."""
 60 |     if etree is None and _need_etree(tmpl):
 61 |         etree = str_to_etree(source)
 62 | 
 63 |     if isinstance(tmpl, list):
 64 |         return [_render(item, source, etree) for item in tmpl]
 65 |     elif isinstance(tmpl, dict):
 66 |         return {k: _render(v, source, etree) for k, v in tmpl.items()}
 67 |     elif isinstance(tmpl, str):
 68 |         return _render_str(tmpl, source, etree)
 69 |     return tmpl
 70 | 
 71 | 
 72 | def _render_str(tmpl: str, source, etree):
 73 |     """Render a string template with the given source."""
 74 |     if len(tmpl.strip()) == 0:
 75 |         return ""
 76 |     elif re.fullmatch(_pattern, tmpl) is None:
 77 |         return tmpl
 78 | 
 79 |     # evaluate the string template to get strategy and arguments
 80 |     finder, expr, *modification = ast.literal_eval(tmpl)
 81 | 
 82 |     # find result from source
 83 |     result = None
 84 |     if isinstance(source, str):
 85 |         if finder.startswith("xp_"):
 86 |             result = _xpath_find(finder[3:], expr, etree)
 87 |         elif finder.startswith("re_"):
 88 |             result = _regex_match(finder[3:], expr, source)
 89 |     elif isinstance(source, dict) and finder == "get":
 90 |         result = source.get(expr)
 91 | 
 92 |     # modify result if needed
 93 |     if result is not None and result != "" and len(modification) > 0:
 94 |         modifier, *args = modification
 95 |         result = _modify(result, modifier, args)
 96 |     return strip(result)
 97 | 
 98 | 
 99 | def _need_etree(tmpl: Any):
100 |     """Check if the template needs an etree."""
101 |     if isinstance(tmpl, list):
102 |         return any(_need_etree(item) for item in tmpl)
103 |     elif isinstance(tmpl, dict):
104 |         return any(_need_etree(v) for v in tmpl.values())
105 |     elif isinstance(tmpl, str):
106 |         return "xp_" in tmpl
107 |     return False
108 | 
109 | 
110 | def _xpath_find(strategy: str, expr: str, etree: Element):
111 |     """Find strings in an element tree using xpath."""
112 |     if strategy == "elem":
113 |         elem = etree.find(expr)
114 |         if elem is not None:
115 |             return ElementTree.tostring(elem, encoding="unicode")
116 |     elif strategy == "elems":
117 |         elist = etree.findall(expr)
118 |         return [ElementTree.tostring(e, encoding="unicode") for e in elist]
119 |     elif strategy == "text":
120 |         return etree.findtext(expr)
121 |     elif strategy == "texts":
122 |         return list(dict.fromkeys(e.text for e in etree.findall(expr)))
123 |     elif strategy.startswith("attr_"):
124 |         elem = etree.find(expr)
125 |         if elem is not None:
126 |             return elem.attrib[strategy[6:]]
127 |     elif strategy.startswith("attrs_"):
128 |         elist = etree.findall(expr)
129 |         return [e.attrib[strategy[7:]] for e in elist]
130 |     return None
131 | 
132 | 
133 | def _regex_match(strategy: str, expr: str, source: str):
134 |     """Match strings in a source string using regex."""
135 |     pattern = re.compile(expr, re.DOTALL)
136 |     if strategy == "match":
137 |         matches = pattern.search(source)
138 |         return matches.group(1) if matches else None
139 |     elif strategy == "matches":
140 |         return list(dict.fromkeys(pattern.findall(source)))
141 |     return None
142 | 
143 | 
144 | def _modify(result: Any, strategy: str, args: list):
145 |     """Modify the result using the given strategy and arguments."""
146 |     args_len = len(args)
147 |     if strategy == "int":
148 |         result = int(result)
149 |     if strategy == "float":
150 |         result = float(result)
151 |     elif strategy == "split" and args_len == 1:
152 |         result = result.split(args[0])
153 |     elif strategy == "prefix" and args_len == 1:
154 |         result = args[0] + result
155 |     elif strategy == "suffix" and args_len == 1:
156 |         result = result + args[0]
157 |     elif strategy == "re_sub" and args_len == 2:
158 |         pattern, repl = args
159 |         result = re_sub(result, pattern, repl)
160 |     elif strategy == "reformat" and args_len == 2:
161 |         orig_pattern, new_pattern = args
162 |         result = time.strftime(new_pattern, time.strptime(result, orig_pattern))
163 |     elif strategy == "strftime":
164 |         pattern = args[0]
165 |         millisecs = args[1] if args_len == 2 else False
166 |         result = strftime(result, pattern, millisecs)
167 |     return result
168 | 


--------------------------------------------------------------------------------
/scraper/functions/doh.py:
--------------------------------------------------------------------------------
  1 | """The implementation of the doh function."""
  2 | import ast
  3 | import base64
  4 | import concurrent
  5 | import concurrent.futures
  6 | import json
  7 | import logging
  8 | import socket
  9 | import struct
 10 | import urllib
 11 | import urllib.request
 12 | from pathlib import Path
 13 | from typing import Dict, List, Optional
 14 | 
 15 | from scraper.functions import Args, Func
 16 | 
 17 | _logger = logging.getLogger(__name__)
 18 | 
 19 | # define a global set to store registered hosts
 20 | _registered_hosts = set()
 21 | 
 22 | # define a global thread pool executor
 23 | _executor = concurrent.futures.ThreadPoolExecutor()
 24 | 
 25 | # define default DoH configuration
 26 | _doh_timeout = 5
 27 | _doh_cache: Dict[str, str] = {}
 28 | _resolvers_conf = Path(__file__).resolve().parent / "../../resolvers.conf"
 29 | with open(_resolvers_conf, "r", encoding="utf-8") as doh_reader:
 30 |     _doh_resolvers = ast.literal_eval(doh_reader.read())
 31 | 
 32 | 
 33 | def _patched_getaddrinfo(host, *args, **kwargs):
 34 |     """Patched version of socket.getaddrinfo."""
 35 |     if host not in _registered_hosts:
 36 |         return _orig_getaddrinfo(host, *args, **kwargs)
 37 | 
 38 |     # check if the host is already resolved
 39 |     if host in _doh_cache:
 40 |         ip = _doh_cache[host]
 41 |         _logger.info("Resolved [%s] to [%s] (cached)", host, ip)
 42 |         return _orig_getaddrinfo(ip, *args, **kwargs)
 43 | 
 44 |     # resolve the host using DoH
 45 |     futures = []
 46 |     for resolver in _doh_resolvers:
 47 |         futures.append(_executor.submit(_doh_query, resolver, host))
 48 | 
 49 |     for future in concurrent.futures.as_completed(futures):
 50 |         ip = future.result()
 51 |         if ip is not None:
 52 |             _logger.info("Resolved [%s] to [%s]", host, ip)
 53 |             _doh_cache[host] = ip
 54 |             host = ip
 55 |             break
 56 | 
 57 |     return _orig_getaddrinfo(host, *args, **kwargs)
 58 | 
 59 | 
 60 | # monkey patch socket.getaddrinfo
 61 | _orig_getaddrinfo = socket.getaddrinfo
 62 | socket.getaddrinfo = _patched_getaddrinfo
 63 | 
 64 | 
 65 | def _doh_query(resolver: str, host: str) -> Optional[str]:
 66 |     """Query the IP address of the given host using the given DoH resolver."""
 67 | 
 68 |     # construct DNS query message (RFC 1035)
 69 |     header = b"".join(
 70 |         [
 71 |             b"\x00\x00",  # ID: 0
 72 |             b"\x01\x00",  # FLAGS: standard recursive query
 73 |             b"\x00\x01",  # QDCOUNT: 1
 74 |             b"\x00\x00",  # ANCOUNT: 0
 75 |             b"\x00\x00",  # NSCOUNT: 0
 76 |             b"\x00\x00",  # ARCOUNT: 0
 77 |         ]
 78 |     )
 79 |     question = b"".join(
 80 |         [
 81 |             b"".join(
 82 |                 [
 83 |                     struct.pack("B", len(item)) + item.encode("utf-8")
 84 |                     for item in host.split(".")
 85 |                 ]
 86 |             )
 87 |             + b"\x00",  # QNAME: domain name sequence
 88 |             b"\x00\x01",  # QTYPE: A
 89 |             b"\x00\x01",  # QCLASS: IN
 90 |         ]
 91 |     )
 92 |     message = header + question
 93 | 
 94 |     try:
 95 |         # send GET request to DoH resolver (RFC 8484)
 96 |         b64message = base64.b64encode(message).decode("utf-8").rstrip("=")
 97 |         url = f"https://{resolver}/dns-query?dns={b64message}"
 98 |         headers = {"Content-Type": "application/dns-message"}
 99 |         _logger.info("DoH request: %s", url)
100 | 
101 |         request = urllib.request.Request(url, headers=headers, method="GET")
102 |         with urllib.request.urlopen(request, timeout=_doh_timeout) as response:
103 |             _logger.info("Resolver(%s) response: %s", resolver, response.status)
104 |             if response.status != 200:
105 |                 return None
106 |             resp_body = response.read()
107 | 
108 |         # parse DNS response message (RFC 1035)
109 |         # name(compressed):2 + type:2 + class:2 + ttl:4 + rdlength:2 = 12 bytes
110 |         first_rdata_start = len(header) + len(question) + 12
111 |         # rdata(A record) = 4 bytes
112 |         first_rdata_end = first_rdata_start + 4
113 |         # convert rdata to IP address
114 |         return socket.inet_ntoa(resp_body[first_rdata_start:first_rdata_end])
115 |     except Exception as e:
116 |         _logger.error("Resolver(%s) request error: %s", resolver, e)
117 |         return None
118 | 
119 | 
120 | def _doh_query_json(resolver: str, host: str) -> Optional[str]:
121 |     """Query the IP address of the given host using the given DoH resolver."""
122 |     url = f"https://{resolver}/dns-query?name={host}&type=A"
123 |     headers = {"Accept": "application/dns-json"}
124 |     _logger.info("DoH request: %s", url)
125 |     try:
126 |         request = urllib.request.Request(url, headers=headers, method="GET")
127 |         with urllib.request.urlopen(request, timeout=_doh_timeout) as response:
128 |             _logger.info("Resolver(%s) response: %s", resolver, response.status)
129 |             if response.status != 200:
130 |                 return None
131 |             response_body = response.read().decode("utf-8")
132 |             _logger.debug("<==  body: %s", response_body)
133 |             answer = json.loads(response_body)["Answer"]
134 |             return answer[0]["data"]
135 |     except Exception as e:
136 |         _logger.error("Resolver(%s) request error: %s", resolver, e)
137 |         return None
138 | 
139 | 
140 | class DohArgs(Args):
141 |     """Arguments for the doh function."""
142 | 
143 |     hosts: List[str]
144 | 
145 |     def parse(self, rawargs: dict, context: dict) -> "DohArgs":
146 |         doh_enabled = context["doh"]
147 |         if doh_enabled:
148 |             self.hosts = rawargs.get("hosts", [])
149 |             if "host" in rawargs:
150 |                 self.hosts.append(rawargs["host"])
151 |         else:
152 |             self.hosts = []
153 |         return self
154 | 
155 | 
156 | @Func("doh", DohArgs)
157 | def doh(args: DohArgs, _) -> None:
158 |     """Put the given hosts into the registered hosts set."""
159 |     _registered_hosts.update(args.hosts)
160 | 


--------------------------------------------------------------------------------
/scraper/functions/loop.py:
--------------------------------------------------------------------------------
 1 | """The implementation of the loop function."""
 2 | import logging
 3 | from typing import Generator, List, Optional, Tuple
 4 | 
 5 | from scraper.functions import Args, Func, functions
 6 | 
 7 | _logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class LoopArgs(Args):
11 |     """Arguments for the loop function."""
12 | 
13 |     source: list
14 |     item: str
15 |     steps: List[Tuple[str, dict]]
16 |     iferr: Optional[str]
17 | 
18 |     def parse(self, rawargs: dict, context: dict) -> "LoopArgs":
19 |         self.source = context[rawargs["source"]]
20 |         self.item = rawargs["item"]
21 |         self.steps = [s.popitem() for s in rawargs["steps"]]
22 |         self.iferr = rawargs.get("iferr")
23 |         return self
24 | 
25 | 
26 | @Func("loop", LoopArgs)
27 | def loop(args: LoopArgs, context: dict) -> Generator:
28 |     """Loop over a list of items and execute steps."""
29 |     for i in range(len(args.source)):
30 |         subcontext = {
31 |             "$parent": context,
32 |             "site": context["site"],
33 |             args.item: args.source[i],
34 |         }
35 |         try:
36 |             for funcname, rawargs in args.steps:
37 |                 # execute the function with subcontext
38 |                 result = functions[funcname](rawargs, subcontext)
39 |                 if result is not None:
40 |                     yield result
41 |             args.source[i] = subcontext[args.item]
42 |         except Exception as e:
43 |             if args.iferr == "continue":
44 |                 _logger.error("Error occurred in loop", exc_info=True)
45 |                 continue
46 |             raise e
47 | 


--------------------------------------------------------------------------------
/scraper/functions/request.py:
--------------------------------------------------------------------------------
  1 | """The implementation of the HTTP function."""
  2 | import json
  3 | import logging
  4 | import shelve
  5 | import time
  6 | import urllib
  7 | import urllib.parse
  8 | import urllib.request
  9 | from http.cookiejar import CookieJar
 10 | from pathlib import Path
 11 | from typing import Any
 12 | 
 13 | from scraper.exceptions import RequestSendError
 14 | from scraper.functions import Args, Func
 15 | 
 16 | _logger = logging.getLogger(__name__)
 17 | 
 18 | # define default HTTP cache configuration
 19 | _basedir = Path(__file__).resolve().parent
 20 | _cache_prefix = ".cache_"
 21 | _cache_expire = 86400
 22 | 
 23 | # define a global opener and install it to urllib.request
 24 | _cookie_processor = urllib.request.HTTPCookieProcessor(CookieJar())
 25 | _global_opener = urllib.request.build_opener(_cookie_processor)
 26 | urllib.request.install_opener(_global_opener)
 27 | 
 28 | 
 29 | class HttpArgs(Args):
 30 |     """Arguments for the HTTP function."""
 31 | 
 32 |     url: str
 33 |     method: str
 34 |     headers: dict
 35 |     body: Any
 36 |     timeout: float
 37 |     result: str
 38 | 
 39 |     def parse(self, rawargs: dict, context: dict) -> "HttpArgs":
 40 |         # urlencode the request query string
 41 |         url = self.substitute(rawargs["url"], context)
 42 |         url = urllib.parse.quote(url, safe=":/?&=")
 43 | 
 44 |         # substitute the request headers
 45 |         headers = {
 46 |             k.lower(): self.substitute(v, context)
 47 |             for k, v in rawargs.get("headers", {}).items()
 48 |         }
 49 | 
 50 |         # process request body according to the content-type
 51 |         body = self.substitute(rawargs.get("body"), context)
 52 |         if body is not None:
 53 |             content_type = headers.get("content-type", "").lower()
 54 |             if content_type.startswith("application/json"):
 55 |                 body = json.dumps(body, ensure_ascii=False)
 56 |             elif content_type.startswith("application/x-www-form-urlencoded"):
 57 |                 body = urllib.parse.urlencode(body)
 58 | 
 59 |         # construct the arguments
 60 |         self.url = url
 61 |         self.method = rawargs["method"].upper()
 62 |         self.headers = headers
 63 |         self.body = body
 64 |         self.timeout = rawargs.get("timeout", 10)
 65 |         self.result = rawargs["result"]
 66 |         return self
 67 | 
 68 | 
 69 | @Func("http", HttpArgs)
 70 | def http(args: HttpArgs, context: dict) -> None:
 71 |     cache_name = _cache_prefix + context["site"]
 72 |     # send the HTTP request
 73 |     response = _http_request(
 74 |         args.url, args.method, args.headers, args.body, args.timeout, cache_name
 75 |     )
 76 |     # put the response into the context
 77 |     context[args.result] = response
 78 | 
 79 | 
 80 | def _http_request(url, method, headers, body, timeout, cache_name):
 81 |     """Send an HTTP request and return the response body."""
 82 |     _logger.info("HTTP request: %s %s", method, url)
 83 |     _logger.debug("==>  headers: %s", headers)
 84 |     _logger.debug("==>  body: %s", body)
 85 | 
 86 |     # check if the cache is expired
 87 |     shelve_flag = "c"  # creating database if not exist
 88 |     for cache_file in _basedir.glob(cache_name + "*"):
 89 |         modify_time = cache_file.stat().st_mtime
 90 |         if (time.time() - modify_time) > _cache_expire:
 91 |             shelve_flag = "n"  # always create a new, empty database
 92 | 
 93 |     # send the request and cache the response
 94 |     with shelve.open(str(_basedir / cache_name), shelve_flag) as cache:
 95 |         cache_key = url + str(body)
 96 |         if cache_key in cache:
 97 |             response_body = cache[cache_key]
 98 |             _logger.info("HTTP response: cached")
 99 |             _logger.debug("<==  body: %s", response_body)
100 |             return response_body
101 | 
102 |         try:
103 |             body = body.encode("utf-8") if body is not None else None
104 |             request = urllib.request.Request(url, body, headers, method=method)
105 |             with urllib.request.urlopen(request, timeout=timeout) as response:
106 |                 response_body = response.read().decode("utf-8")
107 |                 if 200 <= response.status < 300:
108 |                     cache[cache_key] = response_body
109 |                 _logger.info("HTTP response: %s", response.status)
110 |                 _logger.debug("<==  headers: %s", response.headers)
111 |                 _logger.debug("<==  body: %s", response_body)
112 |                 return response_body
113 |         except Exception as e:
114 |             _logger.error("HTTP request error: %s", e)
115 |             raise RequestSendError from e
116 | 


--------------------------------------------------------------------------------
/scraper/functions/retval.py:
--------------------------------------------------------------------------------
 1 | """The implementation of the retval function."""
 2 | import ast
 3 | from datetime import datetime
 4 | from typing import Any, Optional, Union
 5 | 
 6 | from scraper.exceptions import StopSignal
 7 | from scraper.functions import Args, Func
 8 | 
 9 | 
10 | class RetvalArgs(Args):
11 |     """Arguments for the retval function."""
12 | 
13 |     condition: bool
14 |     ctxkey: Optional[str]
15 | 
16 |     def parse(self, rawargs: dict, context: dict) -> "RetvalArgs":
17 |         condition = True
18 | 
19 |         ifempty = rawargs.get("ifempty")
20 |         if ifempty is not None:
21 |             obj = context.get(ifempty)
22 |             condition &= obj is None or len(obj) == 0
23 | 
24 |         notempty = rawargs.get("notempty")
25 |         if notempty is not None:
26 |             obj = context.get(notempty)
27 |             condition &= obj is not None and len(obj) > 0
28 | 
29 |         compare = rawargs.get("compare")
30 |         if compare is not None:
31 |             left_key, operator, right_key = ast.literal_eval(compare)
32 |             left = context.get(left_key)
33 |             right = context.get(right_key)
34 |             condition &= _compare(left, operator, right)
35 | 
36 |         self.condition = condition
37 |         self.ctxkey = rawargs.get("source")
38 |         return self
39 | 
40 | 
41 | @Func("retval", RetvalArgs)
42 | def retval(args: RetvalArgs, context: dict) -> Any:
43 |     """Return the value from context with given key."""
44 |     if args.condition:
45 |         if args.ctxkey is not None:
46 |             return context[args.ctxkey]
47 |         else:
48 |             raise StopSignal
49 | 
50 | 
51 | def _compare(left: Any, operator: str, right: Any) -> bool:
52 |     """Compare two values with the given operator."""
53 |     if left is None or right is None:
54 |         return True  # ignore compare if either value is None
55 |     if isinstance(left, (int, float)) and isinstance(right, (int, float)):
56 |         return _compare_num(left, operator, right)
57 |     if isinstance(left, str) and isinstance(right, str):
58 |         return _compare_num(_timestamp(left), operator, _timestamp(right))
59 |     return False
60 | 
61 | 
62 | def _compare_num(
63 |     left: Union[int, float], operator: str, right: Union[int, float]
64 | ) -> bool:
65 |     """Compare two numbers with the given operator."""
66 |     if operator == "==":
67 |         return left == right
68 |     elif operator == "!=":
69 |         return left != right
70 |     elif operator == ">":
71 |         return left > right
72 |     elif operator == ">=":
73 |         return left >= right
74 |     elif operator == "<":
75 |         return left < right
76 |     elif operator == "<=":
77 |         return left <= right
78 |     else:
79 |         return False
80 | 
81 | 
82 | def _timestamp(time_str: str) -> float:
83 |     """Convert a time string to timestamp."""
84 |     if len(time_str) == 4:
85 |         format_str = "%Y"
86 |     elif len(time_str) == 7:
87 |         format_str = "%Y-%m"
88 |     else:
89 |         format_str = "%Y-%m-%d"
90 | 
91 |     try:
92 |         return datetime.strptime(time_str, format_str).timestamp()
93 |     except ValueError:
94 |         return 0
95 | 


--------------------------------------------------------------------------------
/scraper/scraper.py:
--------------------------------------------------------------------------------
  1 | """Entry point for the scraper."""
  2 | import argparse
  3 | import json
  4 | import logging
  5 | import threading
  6 | import time
  7 | from pathlib import Path
  8 | from typing import Any, Dict, List, Optional
  9 | 
 10 | from scraper.enums import lang_type, video_type
 11 | from scraper.exceptions import ScrapeError, StopSignal
 12 | from scraper.fake import fake_result
 13 | from scraper.functions import findfunc
 14 | 
 15 | _logger = logging.getLogger(__name__)
 16 | 
 17 | # define default scraping configuration path
 18 | _basedir = Path(__file__).resolve().parent
 19 | _flow_path = _basedir / "../scrapeflows"
 20 | _flowconf_path = _basedir / "../scrapeflows.conf"
 21 | 
 22 | # define maximum number of results to return
 23 | _maxlimit = 10
 24 | _results: List[Any] = []
 25 | 
 26 | 
 27 | def scrape(plugin_id: str) -> str:
 28 |     """Scrape video information from given arguments."""
 29 |     parser = argparse.ArgumentParser()
 30 |     parser.add_argument("--input", type=str, required=True)
 31 |     parser.add_argument("--type", type=video_type, required=True)
 32 |     parser.add_argument("--lang", type=lang_type, required=False)
 33 |     parser.add_argument("--limit", type=int, default=_maxlimit)
 34 |     parser.add_argument("--allowguess", action="store_true", default=False)
 35 |     parser.add_argument("--loglevel", type=str, default="critical")
 36 | 
 37 |     args = parser.parse_known_args()[0]
 38 |     videotype = args.type.value
 39 |     language = args.lang.value if args.lang is not None else None
 40 |     maxlimit = min(args.limit, _maxlimit)
 41 |     loglevel = args.loglevel.upper()
 42 | 
 43 |     # set basic logging configuration
 44 |     logformat = (
 45 |         "%(asctime)s %(threadName)s %(levelname)s "
 46 |         "%(filename)s:%(lineno)d - %(message)s"
 47 |     )
 48 |     logging.basicConfig(level=getattr(logging, loglevel), format=logformat)
 49 | 
 50 |     # parse --input argument as JSON
 51 |     jsoninput = json.loads(args.input)
 52 |     if jsoninput["title"] == "--install":
 53 |         return fake_result(plugin_id, videotype)
 54 |     initialval = {
 55 |         "title": jsoninput["title"],
 56 |         "season": jsoninput.get("season", 0),
 57 |         "episode": jsoninput.get("episode", 1),
 58 |         "available": jsoninput.get("original_available", None),
 59 |         "year": str(jsoninput.get("original_available", ""))[:4],
 60 |         "lang": language,
 61 |         "limit": maxlimit,
 62 |         "version": _version(plugin_id),
 63 |     }
 64 | 
 65 |     # load and execute scrape flows using multithreading
 66 |     start = time.time()
 67 |     taskqueue: Dict[int, List[threading.Thread]] = {}
 68 |     for flow in ScrapeFlow.load(_flow_path, videotype, language, initialval):
 69 |         task = threading.Thread(target=_start, args=(flow, maxlimit))
 70 |         tasks = taskqueue.get(flow.priority, [])
 71 |         tasks.append(task)
 72 |         taskqueue[flow.priority] = tasks
 73 |     for tasks in dict(sorted(taskqueue.items(), key=lambda x: x[0])).values():
 74 |         if len(_results) >= maxlimit:
 75 |             break
 76 |         for task in tasks:
 77 |             task.start()
 78 |         for task in tasks:
 79 |             task.join()
 80 |     end = time.time()
 81 |     _logger.info("Total execution time: %.3f seconds", end - start)
 82 |     return json.dumps(
 83 |         {"success": True, "result": _results}, ensure_ascii=False, indent=2
 84 |     ).replace("[plugin_id]", plugin_id)
 85 | 
 86 | 
 87 | def _start(flow: "ScrapeFlow", limit: int):
 88 |     """Start a scrape flow and store results."""
 89 |     try:
 90 |         result_gen = flow.start()
 91 |         while True:
 92 |             if len(_results) >= limit:
 93 |                 break
 94 |             try:
 95 |                 _results.append(next(result_gen))
 96 |             except StopIteration:
 97 |                 break
 98 |     except ScrapeError:
 99 |         _logger.error("Failed to scrape from %s", flow.site, exc_info=True)
100 | 
101 | 
102 | def _version(plugin_id: str) -> str:
103 |     """Split the plugin ID to get the version."""
104 |     if "-" in plugin_id:
105 |         version = plugin_id.split("-")[-1]
106 |         if version != "plugin":
107 |             return f"/{version}"
108 |     return ""
109 | 
110 | 
111 | class ScrapeFlow:
112 |     """A flow of steps to scrape video information."""
113 | 
114 |     def __init__(
115 |         self,
116 |         site: str,
117 |         steps: list,
118 |         context: dict,
119 |         priority: Optional[int],
120 |     ):
121 |         self.site = site
122 |         self.steps = steps
123 |         self.context = context
124 |         self.priority = priority if priority is not None else 999
125 | 
126 |     def start(self):
127 |         """Start the scrape flow and return a generator."""
128 |         for funcname, rawargs in [s.popitem() for s in self.steps]:
129 |             # execute the function with context
130 |             try:
131 |                 iterable = findfunc(funcname)(rawargs, self.context)
132 |                 if iterable is not None:
133 |                     yield from iterable
134 |             except StopSignal:
135 |                 break
136 | 
137 |     @staticmethod
138 |     def load(path: Path, videotype: str, language: str, initialval: dict):
139 |         """Load scrape flows from given path."""
140 | 
141 |         flowconf = None
142 |         if _flowconf_path.exists():
143 |             with open(_flowconf_path, "r", encoding="utf-8") as conf_reader:
144 |                 flowconf = json.load(conf_reader)
145 | 
146 |         for filepath in path.glob("*.json"):
147 |             with open(filepath, "r", encoding="utf-8") as def_reader:
148 |                 flowdef = json.load(def_reader)
149 |             site = flowdef["site"]
150 |             siteconf = None
151 |             if flowconf is not None and site in flowconf:
152 |                 siteconf = flowconf[site]
153 | 
154 |             # filter out flows that do not match the video type
155 |             if not ScrapeFlow.valid(flowdef, siteconf, videotype, language):
156 |                 continue
157 | 
158 |             # generate a flow instance from the definition
159 |             steps = list(flowdef["steps"])
160 |             context = initialval.copy()
161 |             context["site"] = site
162 |             context["doh"] = flowdef.get("doh_enabled", False)
163 |             priority = None
164 |             if siteconf is not None:
165 |                 priority = siteconf["priority"]
166 |                 context.update(siteconf)
167 |             yield ScrapeFlow(site, steps, context, priority)
168 | 
169 |     @staticmethod
170 |     def valid(flowdef: Any, siteconf: Any, videotype: str, language: str):
171 |         """Check if the flow definition is valid."""
172 | 
173 |         if language is not None and "lang" in flowdef:
174 |             if language not in flowdef["lang"]:
175 |                 return False
176 | 
177 |         if flowdef["type"] != videotype:
178 |             return False
179 | 
180 |         if siteconf is not None:
181 |             if not any(videotype.startswith(t) for t in siteconf["types"]):
182 |                 return False
183 | 
184 |         return True
185 | 


--------------------------------------------------------------------------------
/scraper/utils.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for this package."""
  2 | import json
  3 | import re
  4 | import time
  5 | from html.parser import HTMLParser
  6 | from typing import Any, List, Optional, Union
  7 | from xml.etree import ElementTree
  8 | 
  9 | from scraper.exceptions import ResultParseError
 10 | 
 11 | 
 12 | def strftime(
 13 |     timestamp: Union[str, int, float], pattern: str, millisecs: bool = False
 14 | ) -> str:
 15 |     """Format a timestamp with the given pattern."""
 16 |     if isinstance(timestamp, str):
 17 |         timestamp = float(timestamp)
 18 | 
 19 |     if millisecs:
 20 |         timestamp /= 1000
 21 | 
 22 |     return time.strftime(pattern, time.localtime(timestamp))
 23 | 
 24 | 
 25 | def dict_update(d1: dict, d2: dict) -> dict:
 26 |     """Recursively update a dictionary."""
 27 |     for k, v2 in d2.items():
 28 |         v1 = d1.get(k, None)
 29 |         if isinstance(v1, dict) and isinstance(v2, dict):
 30 |             d1[k] = dict_update(d1[k], v2)
 31 |         elif isinstance(v1, list) and isinstance(v2, list):
 32 |             d1[k].extend(x for x in v2 if x not in v1)
 33 |         else:
 34 |             d1[k] = v2
 35 | 
 36 |     return d1
 37 | 
 38 | 
 39 | def strip(obj: Any) -> Any:
 40 |     """Recursively strip a string, list, or dict."""
 41 |     if isinstance(obj, list):
 42 |         return list(filter(lambda x: x is not None, [strip(i) for i in obj]))
 43 |     elif isinstance(obj, dict):
 44 |         return {k: strip(v) for k, v in obj.items()}
 45 |     elif isinstance(obj, str):
 46 |         obj = obj.strip()
 47 |         return obj if obj != "" else None
 48 |     return obj
 49 | 
 50 | 
 51 | def re_sub(obj: Any, pattern: str, repl: str) -> Any:
 52 |     """Recursively replace a pattern in a string, list, or dict."""
 53 |     if isinstance(obj, list):
 54 |         return [re_sub(item, pattern, repl) for item in obj]
 55 |     elif isinstance(obj, dict):
 56 |         return {k: re_sub(v, pattern, repl) for k, v in obj.items()}
 57 |     elif isinstance(obj, str):
 58 |         return re.sub(pattern, repl, obj)
 59 |     return obj
 60 | 
 61 | 
 62 | def str_to_etree(string: str) -> Optional[ElementTree.Element]:
 63 |     """Convert a string to an ElementTree."""
 64 |     string = string.strip()
 65 |     if string.startswith("{") or string.startswith("["):
 66 |         return json_to_etree(json.loads(string, strict=False))
 67 |     elif string.startswith("<"):
 68 |         return html_to_etree(string)
 69 |     return None
 70 | 
 71 | 
 72 | def json_to_etree(json_obj: Any, tag: str = "root"):
 73 |     """Convert a JSON object to an ElementTree."""
 74 |     element = ElementTree.Element(tag)
 75 |     if isinstance(json_obj, list):
 76 |         for i, item in enumerate(json_obj):
 77 |             element.append(json_to_etree(item, f"i{str(i)}"))
 78 |     elif isinstance(json_obj, dict):
 79 |         for k, v in json_obj.items():
 80 |             element.append(json_to_etree(v, k))
 81 |     elif json_obj is not None:
 82 |         element.text = str(json_obj)
 83 |     return element
 84 | 
 85 | 
 86 | def html_to_etree(html_text: str):
 87 |     """Convert an HTML text to an ElementTree."""
 88 |     return EtreeHTMLParser().parse(html_text)
 89 | 
 90 | 
 91 | class EtreeHTMLParser(HTMLParser):
 92 |     """Simple HTML parser that converts HTML to an ElementTree."""
 93 | 
 94 |     tag_stack: List[ElementTree.Element]
 95 |     cur_tag: Optional[ElementTree.Element]
 96 |     after_end: bool
 97 | 
 98 |     def __init__(self):
 99 |         super().__init__()
100 |         self.tag_stack = []
101 |         self.cur_tag = None
102 |         self.after_end = False
103 | 
104 |     def handle_starttag(self, tag, attrs):
105 |         self.after_end = False
106 |         self.cur_tag = ElementTree.Element(tag, {k: v or "" for k, v in attrs})
107 |         if len(self.tag_stack) > 0:
108 |             self.tag_stack[-1].append(self.cur_tag)
109 |         self.tag_stack.append(self.cur_tag)
110 | 
111 |     def handle_endtag(self, tag):
112 |         while any(item.tag == tag for item in self.tag_stack):
113 |             self.after_end = True
114 |             self.cur_tag = self.tag_stack.pop()
115 |             if self.cur_tag.tag == tag:
116 |                 break
117 | 
118 |     def handle_data(self, data):
119 |         if self.cur_tag is not None:
120 |             if self.after_end:
121 |                 self.cur_tag.tail = data.strip()
122 |             else:
123 |                 self.cur_tag.text = data.strip()
124 | 
125 |     def error(self, message):
126 |         raise ResultParseError
127 | 
128 |     def parse(self, html):
129 |         self.feed(html)
130 |         self.close()
131 |         return self.cur_tag
132 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Package script for this plugin."""
 2 | import string
 3 | from pathlib import Path
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | from version import version
 8 | 
 9 | # get the root directory of this plugin
10 | ROOT_DIR = Path(__file__).resolve().parent
11 | 
12 | # use the name of the root directory as the plugin id
13 | PLUGIN_ID = ROOT_DIR.name
14 | 
15 | # write the INFO file for this plugin
16 | INFO_TMPL = """
17 | {
18 |   "id": "${plugin_id}-${version}",
19 |   "entry_file": "run.sh",
20 |   "type": ["movie", "tvshow"],
21 |   "language": ["chs"],
22 |   "test_example": {
23 |     "movie": {
24 |       "title": "--install"
25 |     },
26 |     "tvshow": {
27 |       "title": "--install"
28 |     },
29 |     "tvshow_episode": {
30 |       "title": "--install",
31 |       "season": 1,
32 |       "episode": 1
33 |     }
34 |   }
35 | }
36 | """
37 | with open(ROOT_DIR / "INFO", "w", encoding="utf-8") as writer:
38 |     template = string.Template(INFO_TMPL)
39 |     writer.write(template.substitute(plugin_id=PLUGIN_ID, version=version()))
40 | 
41 | # use 'python setup.py sdist --formats=zip' command to create the zip file
42 | setup(
43 |     name=PLUGIN_ID,
44 |     version=version(),
45 |     packages=[
46 |         "",
47 |         "scraper",
48 |         "scraper.functions",
49 |         "scrapeflows",
50 |         "configserver"
51 |     ],
52 |     package_data={
53 |         "": ["run.sh", "resolvers.conf", "INFO"],
54 |         "scrapeflows": ["*.json"],
55 |         "configserver": ["templates/*.html"],
56 |     },
57 |     python_requires=">=3.6",
58 | )
59 | 


--------------------------------------------------------------------------------
/version.py:
--------------------------------------------------------------------------------
 1 | """Version number management."""
 2 | import subprocess
 3 | 
 4 | __all__ = ["version"]
 5 | 
 6 | 
 7 | def version():
 8 |     """Extract the version number from git describe command."""
 9 |     cmd = "git describe --tags --match v[0-9]*".split()
10 |     tag_describe = subprocess.check_output(cmd).decode().strip()
11 |     tag_version = tag_describe[1:]
12 |     if "-" in tag_version:
13 |         tag_version = tag_version.split("-", 1)[0]
14 |     return tag_version
15 | 


--------------------------------------------------------------------------------