├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── integration-tests.yml
│ └── lint.yml
├── .gitignore
├── LICENSE
├── README.md
├── example_html_viewer.html
├── test
├── discord-bot-test-expect.sh
├── discord-bot-test.sh
├── test1.ans
├── test1.in
└── test1.sh
└── vdator
├── .env.EXAMPLE
├── api.py
├── checker.py
├── checks
├── __init__.py
├── audio_track_conversions.py
├── audio_track_people.py
├── audio_track_spellcheck.py
├── chapter_language.py
├── chapter_padding.py
├── check.py
├── filename.py
├── flac_audio_tracks.py
├── has_chapters.py
├── metadata_default_flag.py
├── metadata_ids.py
├── mixins
│ ├── __init__.py
│ ├── is_commentary_track.py
│ ├── is_movie.py
│ ├── print_header.py
│ └── section_id.py
├── mkvmerge.py
├── movie_name_format.py
├── muxing_mode.py
├── print_audio_track_names.py
├── print_chapters.py
├── print_text_tracks.py
├── remove_until_first_codec.py
├── text_default_flag.py
├── text_order.py
├── tracks_have_language.py
├── video_language_matches_first_audio_language.py
└── video_track.py
├── data
├── codecs.json
└── urls.json
├── helpers.py
├── main.py
├── nltk_people.py
├── parsers
├── __init__.py
├── bdinfo_parser.py
├── codecs_parser.py
├── match_bdinfo_audio_to_mediainfo.py
├── media_info_parser.py
├── paste_parser.py
└── url_parser.py
├── reporter.py
├── requirements.txt
└── source_detector.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Binary files that should not be normalized or diffed
2 | *.png binary
3 | *.jpg binary
4 | *.gif binary
5 | *.ico binary
6 |
7 | # Catch all for anything we forgot. Add rules if you get CRLF -> LF warnings.
8 | * eol=lf
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/vdator" # Location of package manifests
10 | schedule:
11 | interval: "daily"
12 |
--------------------------------------------------------------------------------
/.github/workflows/integration-tests.yml:
--------------------------------------------------------------------------------
1 | name: integration tests
2 |
3 | on: [push, pull_request]
4 |
5 | defaults:
6 | run:
7 | working-directory: ./vdator
8 |
9 | jobs:
10 | dependencies:
11 |
12 | runs-on: ubuntu-latest
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | python-version: ["3.8", "3.9", "3.10"]
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | - name: Set up Python ${{ matrix.python-version }}
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: ${{ matrix.python-version }}
24 | - name: Install packages
25 | run: sudo apt-get install -y expect libhunspell-dev
26 | - name: Install python dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 | - name: Setup .env
31 | run: cp .env.EXAMPLE .env
32 |
33 | api:
34 |
35 | runs-on: ubuntu-latest
36 | strategy:
37 | fail-fast: false
38 | matrix:
39 | python-version: ["3.8", "3.9", "3.10"]
40 |
41 | steps:
42 | - uses: actions/checkout@v2
43 | - name: Set up Python ${{ matrix.python-version }}
44 | uses: actions/setup-python@v2
45 | with:
46 | python-version: ${{ matrix.python-version }}
47 | - name: Install packages
48 | run: sudo apt-get install -y libhunspell-dev
49 | - name: Install python dependencies
50 | run: |
51 | python -m pip install --upgrade pip
52 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
53 | - name: Setup .env
54 | run: cp .env.EXAMPLE .env
55 | - name: Run API
56 | env:
57 | MKVMERGE_VERSION: "Version 57.0.0 \"Till The End\" 2021-05-22"
58 | run: python3 api.py &
59 | - name: Test blank input to API
60 | run: ./test1.sh
61 | working-directory: ./test
62 |
63 | discord-bot:
64 |
65 | if: ${{ github.ref == 'refs/heads/main' }}
66 |
67 | runs-on: ubuntu-latest
68 |
69 | steps:
70 | - uses: actions/checkout@v2
71 | - name: Set up Python 3.10
72 | uses: actions/setup-python@v2
73 | with:
74 | python-version: "3.10"
75 | - name: Install packages
76 | run: sudo apt-get install -y expect libhunspell-dev
77 | - name: Install python dependencies
78 | run: |
79 | python -m pip install --upgrade pip
80 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
81 | - name: Setup .env
82 | run: cp .env.EXAMPLE .env
83 | - name: Discord bot can join server
84 | env:
85 | DISCORD_BOT_SECRET: ${{ secrets.DISCORD_BOT_SECRET }}
86 | run: ./discord-bot-test-expect.sh
87 | working-directory: ./test
88 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on: [push, pull_request]
4 |
5 | defaults:
6 | run:
7 | working-directory: ./vdator
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | python-version: ["3.8", "3.9", "3.10"]
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | - name: Install PyHunSpell
24 | run: sudo apt-get install libhunspell-dev
25 | - name: Install dependencies
26 | run: |
27 | python -m pip install --upgrade pip
28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 | - name: Setup .env
30 | run: cp .env.EXAMPLE .env
31 | - name: Lint with black
32 | run: black . --check
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | vdator/bin/
94 | vdator/include
95 | vdator/lib64
96 | vdator/pyvenv.cfg
97 | vdator/.env
98 |
99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 |
103 | # Rope project settings
104 | .ropeproject
105 |
106 | # mkdocs documentation
107 | /site
108 |
109 | # mypy
110 | .mypy_cache/
111 |
112 | test/*.out
113 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 werrpy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # vdator
2 | > Remux validator Discord bot
3 |
4 | [](https://github.com/psf/black)
5 | [](https://github.com/werrpy/vdator/actions/workflows/lint.yml)
6 | [](https://github.com/werrpy/vdator/actions/workflows/integration-tests.yml)
7 |
8 | Takes a Pastebin link with BDInfo and MediaInfo dump, and validates the remux.
9 |
10 | Checks:
11 | ```
12 | Video track names
13 | Movie/TV name format
14 | IMDB/TMDB ids
15 | Filename
16 | Video language matches first audio language
17 | No muxing mode
18 | Uses latest mkvtoolnix
19 | Video and audio track names match
20 | DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA
21 | Commentary to AC-3 @ 224 kbps
22 | Commentary track people and spellcheck
23 | Subtitle order
24 | Subtitle default flag
25 | Should have chapters
26 | Chapter languages
27 | Chapter padding
28 | ```
29 |
30 | ### Table of Contents
31 | - [Supported pastebin sites](#supported-pastebin-sites)
32 | - [Setup](#setup)
33 | * [Create a python3 virtual environment](#create-a-python3-virtual-environment)
34 | * [Installing dependencies](#installing-dependencies)
35 | * [Updating dependencies](#updating-dependencies)
36 | * [Running manually](#running-manually)
37 | * [Running with systemd](#running-with-systemd)
38 | - [Lint](#lint)
39 | - [Using](#using)
40 | - [Adding a pastebin site](#adding-a-pastebin-site)
41 | - [Adding a new check](#adding-a-new-check)
42 | - [API](#api)
43 |
44 | ### Supported pastebin sites
45 |
46 | - [{d}paste](https://dpaste.com/)
47 | - [dpaste](https://dpaste.org/)
48 | - [Hastebin](https://hastebin.com/)
49 | - [Hey! Paste it](https://www.heypasteit.com/)
50 | - [CentOS Pastebin Service](https://paste.centos.org/)
51 | - [Paste.ee](https://paste.ee/)
52 | - [openSUSE Paste](https://paste.opensuse.org/)
53 | - [Pastebin](https://pastebin.com/)
54 | - [Rentry.co - Markdown Pastebin](https://rentry.co/)
55 | - [termbin](https://termbin.com/)
56 | - [TextBin](https://textbin.net/)
57 |
58 | ### Setup
59 |
60 | Requires Python >= 3.8
61 |
62 | Create a [Discord bot](https://discordapp.com/developers/docs/intro) and add it to a server. In the bot settings enable "MESSAGE CONTENT INTENT".
63 |
64 | Copy the enviornment variables template `vdator/.env.EXAMPLE` to `vdator/.env`
65 | Edit `vdator/.env` and set `DISCORD_BOT_SECRET` to your bot's token.
66 |
67 | Request a [TMDB API Key](https://developers.themoviedb.org/3/getting-started/introduction) and set `TMDB_API_KEY`.
68 |
69 | Don't forget to create channels on the server and set them in `vdator/.env` for `REVIEW_CHANNELS`, `REVIEW_REPLY_CHANNELS`, and `BOT_CHANNELS`.
70 |
71 | To prevent overwriting the `vdator/.env` file when pulling changes from git, do `git update-index --skip-worktree vdator/.env`. When you want to pull a new `.env` file, do `git update-index --no-skip-worktree vdator/.env`.
72 |
73 | #### Create a python3 virtual environment:
74 |
75 | Use [pip and virtual env](https://packaging.python.org/guides/installing-using-pip-and-virtualenv/) to run vdator.
76 |
77 | In the `vdator` directory run:
78 | ```bash
79 | python3 -m venv .
80 | ```
81 |
82 | If the command fails to install pip, you will see an error similar to:
83 | ```
84 | Error: Command '['python3', '-Im', 'ensurepip', '--upgrade', '--default-pip']' returned non-zero exit status 1.
85 | ```
86 | Start over by creating a virtual environment without pip, and then install pip manually inside it:
87 | ```bash
88 | python3 -m venv --without-pip .
89 | source bin/activate
90 | curl https://bootstrap.pypa.io/get-pip.py | python3
91 | deactivate
92 | ```
93 |
94 | #### Installing dependencies
95 |
96 | Install [PyHunSpell](https://github.com/blatinier/pyhunspell#installation)
97 |
98 | ```bash
99 | sudo apt install python3-dev libhunspell-dev
100 | ```
101 |
102 | Install dependencies
103 |
104 | ```bash
105 | source bin/activate
106 | pip3 install -r requirements.txt
107 | deactivate
108 | ```
109 |
110 | #### Updating dependencies
111 |
112 | ```bash
113 | source bin/activate
114 | pip3 install -r requirements.txt --upgrade
115 | pip3 freeze > requirements.txt
116 | deactivate
117 | ```
118 |
119 | #### Running manually
120 |
121 | Run the bot manually for testing, exceptions will get printed:
122 | ```bash
123 | source bin/activate
124 | python3 main.py
125 | ```
126 |
127 | #### Running with systemd
128 |
129 | Create a systemd service to run vdator, `/etc/systemd/system/vdator.service`
130 |
131 | ```
132 | [Unit]
133 | Description=vdator
134 | After=multi-user.target
135 |
136 | [Service]
137 | WorkingDirectory=/home/USER/vdator/venv/vdator
138 | User=
139 | Group=
140 | ExecStart=/home/USER/vdator/venv/bin/python3 /home/USER/vdator/venv/vdator/main.py
141 | Type=idle
142 | Restart=always
143 | RestartSec=15
144 |
145 | [Install]
146 | WantedBy=multi-user.target
147 | ```
148 |
149 | Set `User` to the user to run vdator as, and `Group` to the user's group (list with `groups`), usually both are the username.
150 | Replace `/home/USER/vdator/venv/` with the full path to your venv.
151 |
152 | Run `systemctl enable vdator` to start on boot. Use systemctl to start/stop vdator, `systemctl start vdator`, `systemctl stop vdator`, `systemctl restart vdator`
153 |
154 | ### Lint
155 | ```bash
156 | black .
157 | ```
158 |
159 | ### Using
160 |
161 | Type `!help` in one of the bot channels for more information.
162 |
163 | ### Adding a pastebin site
164 |
165 | Edit `vdator/data/urls.json` and add your pastebin site.
166 |
167 | ```
168 | # hostname
169 | "example.com": {
170 | # regex to get paste's unique identifier
171 | "slug_regex": "https://example.com/(.*)",
172 |
173 | # regex to check if paste links directly to raw text, using {} in place of the unique identifier
174 | "raw_url_regex": "https?://pastebin.com/raw/{}",
175 |
176 | # link to raw text, using {} in place of the unique identifier
177 | "raw_url": "https://example.com/raw/{}"
178 | }
179 | ```
180 |
181 | ### Adding a new check
182 |
183 | Edit `vdator/checker.py`.
184 |
185 | In the `run_checks()` method add:
186 | ```python
187 | reply += MyNewCheck(self.reporter, self.mediainfo).run()
188 | ```
189 |
190 | Edit `vdator/checks/__init__.py` and add:
191 | ```python
192 | from .my_check import *
193 | ```
194 |
195 | Create `vdator/checks/my_check.py`:
196 | ```python
197 | from .check import *
198 |
199 |
200 | class MyNewCheck(Check):
201 | def __init__(self, reporter, mediainfo):
202 | super().__init__(reporter, mediainfo, "Error running my check")
203 |
204 | # overriding abstract method
205 | def get_reply(self):
206 | reply = ""
207 | # use self.mediainfo here
208 | # use has() and has_many() to check if the mediainfo keys you need exist, for example:
209 | # if has_many(self.mediainfo, "video.0", ["height"]):
210 | # safe to use self.mediainfo["video"][0]["height"] here
211 | # use self.reporter.print_report() to print status messages
212 | reply += self.reporter.print_report("info", "Some info message")
213 | # lastly return the string result of the check which is appended to the bot reply in run_checks()
214 | return reply
215 | ```
216 |
217 | ### API
218 |
219 | Run with `python api.py`
220 |
221 | Default is port 5000, to use a different port set the PORT environment variable with `export PORT=5000 && python api.py`
222 |
223 | Example using Postman:
224 | ```
225 | POST http://127.0.0.1:5000/text
226 | Body, raw
227 | [INSERT TEXT HERE]
228 | ```
229 |
230 | Gives back json:
231 | ```json
232 | {
233 | "discord_reply":"...",
234 | "html_reply":"..."
235 | }
236 | ```
237 | **discord_reply** - the text that the bot usually sends to discord
238 | **html_reply** - discord text formatted as html
239 |
240 | Insert the `html_reply` text into the `example_html_viewer.html` to see it formatted similar to discord.
241 |
242 | For testing, force a specific version of mkvmerge with
243 |
244 | ````bash
245 | export MKVMERGE_VERSION="Version 54.0.0 \"Hill The End\" 2021-05-22" && python api.py
246 | ````
247 |
248 |
--------------------------------------------------------------------------------
/example_html_viewer.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/test/discord-bot-test-expect.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/expect -f
2 |
3 | # timeout after 60 seconds
4 | set timeout 60
5 |
6 | spawn ./discord-bot-test.sh
7 |
8 | expect "I'm in\r"
9 | expect "vdator-github-actions#7018\r"
10 |
--------------------------------------------------------------------------------
/test/discord-bot-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python ../vdator/main.py
4 |
--------------------------------------------------------------------------------
/test/test1.ans:
--------------------------------------------------------------------------------
1 | {"discord_reply":"\u274c No mediainfo. Are you missing the `General` heading?\n> **Report**\n0 correct, 0 warnings, 1 error, 0 failures, and 0 info","html_reply":"
No mediainfo. Are you missing the General
heading?
Report
0 correct, 0 warnings, 1 error, 0 failures, and 0 info
"}
2 |
--------------------------------------------------------------------------------
/test/test1.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/werrpy/vdator/a6be476623551b797c94a3f5944c1d7c921bfb94/test/test1.in
--------------------------------------------------------------------------------
/test/test1.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curl -s -X POST --data-binary "$(cat test1.in)" http://127.0.0.1:5000/text > test1.out
4 | diff test1.out test1.ans
5 |
--------------------------------------------------------------------------------
/vdator/.env.EXAMPLE:
--------------------------------------------------------------------------------
1 | # channels to listen in and add reactions
2 | REVIEW_CHANNELS=upload-review, remux
3 |
4 | # channels to send full summary to if from review channel
5 | REVIEW_REPLY_CHANNELS=remux-bot
6 |
7 | # channels to listen in and post full summaries
8 | BOT_CHANNELS=remux-bot
9 |
10 | # trainee channels
11 | TRAINEE_CHANNELS=upload-review
12 |
13 | # internal channels
14 | INTERNAL_CHANNELS=remux
15 |
16 | # release group
17 | RELEASE_GROUP=GROUP
18 |
19 | # in-game, Now Playing...
20 | IN_GAME=Remux n00b
21 |
22 | # stop parsing after encountering this line
23 | IGNORE_AFTER_LINE=%%%
24 |
25 | # method to check for line to ignore after
26 | # 'equals' or 'contains'
27 | IGNORE_AFTER_LINE_METHOD=contains
28 |
29 | # ignore input until blank line if current line starts with one of these
30 | #IGNORE_UNTIL_BLANK_LINE_PREFIXES=
31 |
32 | # DVD check mode
33 | # 'mediainfo' to use mediainfo fields
34 | # 'nobdinfo' to assume DVD if no bdinfo given
35 | DVD_CHECK_MODE=nobdinfo
36 |
37 | DISCORD_BOT_SECRET=
38 | DISCORD_MSG_CHAR_LIMIT=2000
39 |
40 | TMDB_API_KEY=
41 | HUNSPELL_LANG=/usr/share/hunspell/en_US.dic, /usr/share/hunspell/en_US.aff
42 | MISSPELLED_IGNORE_LIST=upmix
43 |
44 | MKVTOOLNIX_NEWS=https://mkvtoolnix.download/doc/NEWS.md
45 |
46 | FILENAME_CUTS=Directors.Cut, Extended.Cut, Final.Cut, Theatrical, Uncut, Unrated
47 |
48 | # how many years off the movie year can be. (default: 1)
49 | #MOVIE_YEAR_OFFSET=1
50 |
--------------------------------------------------------------------------------
/vdator/api.py:
--------------------------------------------------------------------------------
1 | """
2 | Experimental REST API
3 |
4 | > python3 api.py
5 | POST http://127.0.0.1:5000/text
6 | Body, raw
7 | [INSERT TEXT HERE]
8 |
9 | {"discord_reply":"...", "html_reply":"..."}
10 | """
11 |
12 | import json, os, traceback
13 | from flask import Flask, jsonify, request
14 |
15 | from discord_markdown.discord_markdown import (
16 | Compiler,
17 | convert_to_html as discord_markdown_convert_to_html,
18 | )
19 |
20 | # Override discord_markdown.discord_markdown.Compiler.compile method to disable printing
21 | # https://github.com/bitjockey42/discord-markdown/blob/9b8d267e3bf1b333bccaae5619a3f2af0a5a54a1/discord_markdown/compiler.py#L29-L37
22 | def compile(self, markdown=False):
23 | if not self._parser.tree:
24 | self._parser.parse()
25 | self._code = ""
26 | for node in self._parser.tree:
27 | self._code = self._code + node.eval(markdown=markdown)
28 | self._code = self._code.strip()
29 | return self._code
30 |
31 |
32 | Compiler.compile = compile
33 |
34 | # parsers
35 | from parsers import *
36 | from source_detector import SourceDetector
37 | from reporter import Reporter
38 | from checker import Checker
39 |
40 | # script location
41 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
42 |
43 | # initialize parsers
44 | bdinfo_parser = BDInfoParser()
45 | paste_parser = PasteParser(bdinfo_parser)
46 | mediainfo_parser = MediaInfoParser()
47 |
48 | with open(os.path.join(__location__, "data/codecs.json")) as f:
49 | codecs = json.load(f)
50 | codecs_parser = CodecsParser(codecs)
51 |
52 | source_detector = SourceDetector()
53 | reporter = Reporter()
54 | checker = Checker(codecs_parser, source_detector, reporter)
55 |
56 | app = Flask(__name__)
57 |
58 |
59 | @app.route("/text", methods=["POST"])
60 | def parse_text():
61 | """
62 | POST http://127.0.0.1:5000/text
63 | Body, raw
64 | [INSERT TEXT HERE]
65 | """
66 |
67 | reply = ""
68 |
69 | try:
70 | # setup/reset reporter
71 | reporter.setup()
72 | text = request.get_data().decode("utf-8")
73 | bdinfo, mediainfo, eac3to = paste_parser.parse(text)
74 | except:
75 | traceback.print_exc()
76 | reply += reporter.print_report("fail", "Failed to get paste")
77 | else:
78 | if mediainfo:
79 | try:
80 | # parse mediainfo
81 | mediainfo = mediainfo_parser.parse(mediainfo)
82 | except:
83 | traceback.print_exc()
84 | reply += reporter.print_report("fail", "Mediainfo parser failed")
85 | else:
86 | try:
87 | # setup checker
88 | checker.setup(bdinfo, mediainfo, eac3to, "remux-bot")
89 | except:
90 | traceback.print_exc()
91 | reply += reporter.print_report(
92 | "fail", "vdator failed to setup checker"
93 | )
94 | else:
95 | try:
96 | reply += checker.run_checks()
97 | except:
98 | traceback.print_exc()
99 | reply += reporter.print_report("fail", "vdator failed to parse")
100 | else:
101 | reply += reporter.print_report(
102 | "error", "No mediainfo. Are you missing the `General` heading?"
103 | )
104 |
105 | # report
106 | reply += "> **Report**\n"
107 | reply += reporter.display_report()
108 |
109 | # prevent infinite loop with 2 multi-line code blocks
110 | # https://github.com/bitjockey42/discord-markdown/issues/6
111 | reply_to_convert = reply.replace("```", "===")
112 | # remove quotes around sections
113 | reply_to_convert = reply_to_convert.replace("> **", "**")
114 |
115 | # convert to html
116 | reply_html = discord_markdown_convert_to_html(reply_to_convert)
117 |
118 | # format html
119 | reply_html = reply_html.replace("===", "
")
120 | # emojis
121 | reply_html = reply_html.replace(
122 | "☑",
123 | "
",
124 | )
125 | reply_html = reply_html.replace(
126 | "⚠",
127 | "
",
128 | )
129 | reply_html = reply_html.replace(
130 | "❌",
131 | "
",
132 | )
133 |
134 | data = {"discord_reply": reply, "html_reply": reply_html}
135 |
136 | return jsonify(data)
137 |
138 |
139 | PORT = os.environ.get("PORT", "5000")
140 | app.run(port=PORT)
141 |
--------------------------------------------------------------------------------
/vdator/checker.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | import logging, os
3 |
4 | # load environment variables
5 | load_dotenv()
6 |
7 | # TMDb API
8 | import tmdbsimple as tmdb
9 |
10 | tmdb.API_KEY = os.environ.get("TMDB_API_KEY")
11 |
12 | # IMDb API
13 | from imdb import Cinemagoer
14 |
15 | ia = Cinemagoer()
16 | logger = logging.getLogger("imdbpy")
17 | logger.disabled = True
18 |
19 | # checks
20 | from checks.mixins import PrintHeader, SectionId, IsCommentaryTrack
21 | from checks.remove_until_first_codec import RemoveUntilFirstCodec
22 | from checks import *
23 |
24 | # nltk data
25 | from nltk_people import download_nltk_data
26 |
27 | download_nltk_data()
28 |
29 |
30 | class Checker(PrintHeader, SectionId, IsCommentaryTrack):
31 | def __init__(self, codecs_parser, source_detector, reporter):
32 | self.codecs = codecs_parser
33 | self.remove_until_first_codec = RemoveUntilFirstCodec(codecs_parser)
34 | self.source_detector = source_detector
35 | self.reporter = reporter
36 |
37 | def setup(self, bdinfo, mediainfo, eac3to, channel_name):
38 | self.bdinfo = bdinfo
39 | self.mediainfo = mediainfo
40 | self.eac3to = eac3to
41 | self.channel_name = channel_name
42 | self.source_detector.setup(bdinfo, mediainfo)
43 |
44 | def run_checks(self):
45 | reply = ""
46 |
47 | # check metadata
48 | reply += self._print_header("Metadata")
49 | reply += CheckMovieNameFormat(self.reporter, self.mediainfo).run()
50 | reply += CheckMetadataIds(self.reporter, self.mediainfo, tmdb, ia).run()
51 | reply += CheckFilename(
52 | self.reporter,
53 | self.source_detector,
54 | self.codecs,
55 | self.remove_until_first_codec,
56 | self.mediainfo,
57 | self.bdinfo,
58 | self.channel_name,
59 | ).run()
60 | reply += CheckTracksHaveLanguage(self.reporter, self.mediainfo).run()
61 | reply += CheckVideoLanguageMatchesFirstAudioLanguage(
62 | self.reporter, self.mediainfo
63 | ).run()
64 | reply += CheckMuxingMode(self.reporter, self.mediainfo).run()
65 | reply += CheckMKVMerge(self.reporter, self.mediainfo).run()
66 | reply += CheckMetadataDefaultFlag(self.reporter, self.mediainfo).run()
67 |
68 | # check video
69 | reply += self._print_header("Video & Audio Tracks")
70 | reply += CheckVideoTrack(
71 | self.reporter,
72 | self.source_detector,
73 | self.codecs,
74 | self.mediainfo,
75 | self.bdinfo,
76 | ).run()
77 |
78 | # check audio
79 | reply += CheckPrintAudioTrackNames(self.reporter, self.mediainfo).run()
80 | reply += CheckAudioTrackConversions(
81 | self.reporter,
82 | self.source_detector,
83 | self.codecs,
84 | self.remove_until_first_codec,
85 | self.mediainfo,
86 | self.bdinfo,
87 | self.eac3to,
88 | ).run()
89 | # check FLAC audio using mediainfo
90 | reply += CheckFLACAudioTracks(
91 | self.reporter, self.remove_until_first_codec, self.mediainfo
92 | ).run()
93 |
94 | # TMDb and IMDb People API
95 | reply += CheckAudioTrackPeople(
96 | self.reporter, self.remove_until_first_codec, self.mediainfo, tmdb, ia
97 | ).run()
98 | reply += CheckAudioTrackSpellCheck(
99 | self.reporter, self.remove_until_first_codec, self.mediainfo
100 | ).run()
101 |
102 | # check text
103 | reply += self._print_header("Text Tracks")
104 | reply += CheckPrintTextTracks(self.reporter, self.mediainfo).run()
105 | reply += CheckTextOrder(self.reporter, self.mediainfo).run()
106 | reply += CheckTextDefaultFlag(self.reporter, self.mediainfo).run()
107 |
108 | # check chapters
109 | reply += CheckPrintChapters(self.reporter, self.mediainfo).run()
110 | reply += CheckHasChapters(self.reporter, self.mediainfo, self.eac3to).run()
111 | reply += CheckChapterLanguage(self.reporter, self.mediainfo).run()
112 | reply += CheckChapterPadding(self.reporter, self.mediainfo).run()
113 |
114 | return reply
115 |
--------------------------------------------------------------------------------
/vdator/checks/__init__.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | # metadata
4 | from .movie_name_format import CheckMovieNameFormat
5 | from .metadata_ids import CheckMetadataIds
6 | from .filename import CheckFilename
7 | from .tracks_have_language import CheckTracksHaveLanguage
8 | from .video_language_matches_first_audio_language import (
9 | CheckVideoLanguageMatchesFirstAudioLanguage,
10 | )
11 | from .muxing_mode import CheckMuxingMode
12 | from .mkvmerge import CheckMKVMerge
13 | from .metadata_default_flag import CheckMetadataDefaultFlag
14 |
15 | # video
16 | from .video_track import CheckVideoTrack
17 |
18 | # audio
19 | from .print_audio_track_names import CheckPrintAudioTrackNames
20 | from .audio_track_conversions import CheckAudioTrackConversions
21 | from .flac_audio_tracks import CheckFLACAudioTracks
22 | from .audio_track_people import CheckAudioTrackPeople
23 | from .audio_track_spellcheck import CheckAudioTrackSpellCheck
24 |
25 | # text
26 | from .print_text_tracks import CheckPrintTextTracks
27 | from .text_order import CheckTextOrder
28 | from .text_default_flag import CheckTextDefaultFlag
29 |
30 | # chapters
31 | from .print_chapters import CheckPrintChapters
32 | from .has_chapters import CheckHasChapters
33 | from .chapter_language import CheckChapterLanguage
34 | from .chapter_padding import CheckChapterPadding
35 |
--------------------------------------------------------------------------------
/vdator/checks/audio_track_conversions.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId, IsCommentaryTrack
3 |
4 | import re
5 |
6 |
7 | class CheckAudioTrackConversions(Check, SectionId, IsCommentaryTrack):
8 | def __init__(
9 | self,
10 | reporter,
11 | source_detector,
12 | codecs,
13 | remove_until_first_codec,
14 | mediainfo,
15 | bdinfo,
16 | eac3to,
17 | ):
18 | super().__init__(reporter, mediainfo, "Error checking audio track conversions")
19 | self.source_detector = source_detector
20 | self.codecs = codecs
21 | self.remove_until_first_codec = remove_until_first_codec
22 | self.bdinfo = bdinfo
23 | self.eac3to = eac3to
24 |
25 | # overriding abstract method
26 | def get_reply(self):
27 | reply = ""
28 |
29 | if self.source_detector.is_dvd():
30 | # no audio track conversions for dvds
31 | reply += self.reporter.print_report(
32 | "info", "No audio track conversions to check for DVDs"
33 | )
34 | return reply
35 | else:
36 | len_bdinfo = len(self.bdinfo["audio"])
37 | len_mediainfo = len(self.mediainfo["audio"])
38 | min_len = min(len_bdinfo, len_mediainfo)
39 |
40 | for i in range(0, min_len):
41 | # audio = dict{'name':'...', 'language':'...'}
42 | bdinfo_audio_title = re.sub(
43 | r"\s+", " ", self.bdinfo["audio"][i]["name"]
44 | )
45 | bdinfo_audio_parts = bdinfo_audio_title.split(" / ")
46 | bdinfo_audio_parts_converted = bdinfo_audio_parts.copy()
47 |
48 | # check audio commentary
49 | (is_commentary, commentary_reply) = self._check_commentary(i)
50 |
51 | if is_commentary:
52 | reply += commentary_reply
53 | elif len(bdinfo_audio_parts) >= 1:
54 | optionally_flac = False
55 | # check audio conversions
56 | if bdinfo_audio_parts[0] == "DTS-HD Master Audio":
57 | # DTS-HD MA
58 | channels = float(bdinfo_audio_parts[1])
59 | if is_float(bdinfo_audio_parts[1]):
60 | if channels < 3:
61 | # can be DTS-HD MA 1.0, DTS-HD MA 2.0, FLAC 1.0, and FLAC 2.0
62 | optionally_flac = True
63 |
64 | reply += self._check_audio_conversion(
65 | i,
66 | "DTS-HD Master Audio",
67 | ["DTS-HD Master Audio", "FLAC Audio"],
68 | )
69 | else:
70 | reply += self._check_audio_conversion(
71 | i, "DTS-HD Master Audio", ["DTS-HD Master Audio"]
72 | )
73 |
74 | elif bdinfo_audio_parts[0] == "LPCM Audio":
75 | if (
76 | is_float(bdinfo_audio_parts[1])
77 | and float(bdinfo_audio_parts[1]) < 3
78 | ):
79 | # LPCM 1.0 or 2.0 to FLAC
80 | reply += self._check_audio_conversion(
81 | i, "LPCM Audio", ["FLAC Audio"]
82 | )
83 | bdinfo_audio_parts_converted[0] = "FLAC Audio"
84 | else:
85 | # LPCM > 2.0 to DTS-HD MA
86 | reply += self._check_audio_conversion(
87 | i, "LPCM Audio", ["DTS-HD Master Audio"]
88 | )
89 | bdinfo_audio_parts_converted[0] = "DTS-HD Master Audio"
90 |
91 | # check track names match
92 | if "title" in self.mediainfo["audio"][i]:
93 | mediainfo_audio_title = self.mediainfo["audio"][i][
94 | "title"
95 | ].strip()
96 | (
97 | mediainfo_audio_title,
98 | _,
99 | _,
100 | ) = self.remove_until_first_codec.remove(mediainfo_audio_title)
101 |
102 | bdinfo_audio_title = " / ".join(bdinfo_audio_parts_converted)
103 | bdinfo_audio_titles = [bdinfo_audio_title]
104 | if optionally_flac:
105 | # May be converted to FLAC
106 | # Add DTS-HD MA 1.0/2.0/2.1 and FLAC 1.0/2.0/2.1 as options
107 | old_bdinfo_audio_parts_converted = (
108 | bdinfo_audio_parts_converted.copy()
109 | )
110 |
111 | bdinfo_audio_parts_converted[0] = "FLAC Audio"
112 | # FLAC 2.0/2.1
113 | bdinfo_audio_titles.append(
114 | " / ".join(bdinfo_audio_parts_converted)
115 | )
116 | bdinfo_audio_parts_converted[1] = "1.0"
117 | # FLAC 1.0
118 | bdinfo_audio_titles.append(
119 | " / ".join(bdinfo_audio_parts_converted)
120 | )
121 |
122 | # DTS-HD MA 2.0/2.1
123 | bdinfo_audio_titles.append(
124 | " / ".join(old_bdinfo_audio_parts_converted)
125 | )
126 | old_bdinfo_audio_parts_converted[1] = "1.0"
127 | # DTS-HD MA 1.0
128 | bdinfo_audio_titles.append(
129 | " / ".join(old_bdinfo_audio_parts_converted)
130 | )
131 |
132 | if self.mediainfo["audio"][i]["title"] in bdinfo_audio_titles:
133 | reply += self.reporter.print_report(
134 | "correct",
135 | "Audio "
136 | + self._section_id("audio", i)
137 | + ": Track names match",
138 | )
139 | else:
140 | # use bitrate from mediainfo audio title
141 | m_bit_rate = re.search(
142 | r"(\d+)\skbps", mediainfo_audio_title
143 | )
144 | if m_bit_rate:
145 | m_bit_rate = m_bit_rate.group(1)
146 | for j, title in enumerate(bdinfo_audio_titles):
147 | bdinfo_audio_titles[j] = re.sub(
148 | r"(.*\s)\d+(\skbps.*)",
149 | r"\g<1>{}\g<2>".format(m_bit_rate),
150 | title,
151 | )
152 |
153 | # if it has TrueHD objects, add them to the audio channel
154 | if (
155 | "number_of_dynamic_objects"
156 | in self.mediainfo["audio"][i]
157 | ):
158 | bdinfo_audio_title = re.sub(
159 | r"(.*\d\.\d)(.*)",
160 | r"\g<1>+{} objects\g<2>".format(
161 | self.mediainfo["audio"][i][
162 | "number_of_dynamic_objects"
163 | ]
164 | ),
165 | bdinfo_audio_title,
166 | )
167 | bdinfo_audio_titles.append(bdinfo_audio_title)
168 |
169 | # bdinfo_audio_titles has list of possible titles
170 | if mediainfo_audio_title not in bdinfo_audio_titles:
171 | reply += self.reporter.print_report(
172 | "error",
173 | "Audio "
174 | + self._section_id("audio", i)
175 | + ": Bad conversion:\n```fix\nBDInfo: "
176 | + bdinfo_audio_title
177 | + "\nMediaInfo: "
178 | + self.mediainfo["audio"][i]["title"]
179 | + "```",
180 | new_line=False,
181 | )
182 | reply += show_diff(
183 | self.mediainfo["audio"][i]["title"],
184 | bdinfo_audio_title,
185 | )
186 | else:
187 | reply += self.reporter.print_report(
188 | "correct",
189 | "Audio "
190 | + self._section_id("audio", i)
191 | + ": Track names match",
192 | )
193 | else:
194 | reply += self.reporter.print_report(
195 | "error",
196 | "Audio "
197 | + self._section_id("audio", i)
198 | + ": Missing track name",
199 | )
200 |
201 | if min_len < len_mediainfo:
202 | reply += self.reporter.print_report(
203 | "warning",
204 | "Checked first `{}/{}` audio tracks".format(min_len, len_mediainfo),
205 | )
206 |
207 | return reply
208 |
209 | def _check_commentary(self, i):
210 | reply, is_commentary = "", False
211 |
212 | if self._is_commentary_track(self.mediainfo["audio"][i]["title"]):
213 | is_commentary = True
214 | # audio = dict{'name':'...', 'language':'...'}
215 | if self.bdinfo["audio"][i]["name"].count("/") >= 1:
216 | bdinfo_audio_format = (
217 | self.bdinfo["audio"][i]["name"].split("/")[0].strip()
218 | )
219 |
220 | if bdinfo_audio_format == "Dolby Digital Audio":
221 | if "format" in self.mediainfo["audio"][i]:
222 | if self.mediainfo["audio"][i]["format"] == "AC-3":
223 | reply += self.reporter.print_report(
224 | "correct",
225 | "Audio "
226 | + self._section_id("audio", i)
227 | + ": Commentary already AC-3",
228 | )
229 | else:
230 | reply += self.reporter.print_report(
231 | "error",
232 | "Audio "
233 | + self._section_id("audio", i)
234 | + ": Commentary should be AC-3 instead of "
235 | + self.mediainfo["audio"][i]["format"],
236 | )
237 | else:
238 | reply += self.reporter.print_report(
239 | "error",
240 | "Audio "
241 | + self._section_id("audio", i)
242 | + ": Commentary does not have a format",
243 | )
244 |
245 | return is_commentary, reply
246 | else:
247 | reply += self.reporter.print_report(
248 | "warning",
249 | "Audio #"
250 | + self._section_id("audio", i)
251 | + ": Cannot verify commentary audio conversion",
252 | )
253 | return is_commentary, reply
254 |
255 | if (
256 | "format" in self.mediainfo["audio"][i]
257 | and self.mediainfo["audio"][i]["format"] == "AC-3"
258 | ):
259 | if "bit_rate" in self.mediainfo["audio"][i]:
260 | bit_rate = "".join(
261 | re.findall(r"[\d]+", self.mediainfo["audio"][i]["bit_rate"])
262 | )
263 | if bit_rate == "224":
264 | reply += self.reporter.print_report(
265 | "correct",
266 | "Audio "
267 | + self._section_id("audio", i)
268 | + ": Commentary converted to `AC-3 @ 224 kbps`",
269 | )
270 | else:
271 | reply += self.reporter.print_report(
272 | "error",
273 | "Audio "
274 | + self._section_id("audio", i)
275 | + ": Commentary AC-3 bitrate should be `224 kbps` instead of `"
276 | + self.mediainfo["audio"][i]["bit_rate"]
277 | + "`",
278 | )
279 | else:
280 | reply += self.reporter.print_report(
281 | "error",
282 | "Audio "
283 | + self._section_id("audio", i)
284 | + ": Commentary AC-3 does not have a bitrate",
285 | )
286 | else:
287 | reply += self.reporter.print_report(
288 | "info",
289 | "Audio "
290 | + self._section_id("audio", i)
291 | + ": Commentary may be converted to AC-3",
292 | )
293 |
294 | return is_commentary, reply
295 |
296 | def _check_audio_conversion(self, i, audio_from, audio_to):
297 | reply = ""
298 |
299 | # verify audio track titles
300 | if (
301 | " / " not in self.bdinfo["audio"][i]["name"]
302 | or "title" not in self.mediainfo["audio"][i]
303 | or " / " not in self.mediainfo["audio"][i]["title"]
304 | ):
305 | reply += self.reporter.print_report(
306 | "warning", "Could not verify audio " + self._section_id("audio", i)
307 | )
308 | return reply
309 |
310 | # [codec, channel, sampling rate, bit rate, bit depth]
311 | bdinfo_audio_parts = self.bdinfo["audio"][i]["name"].split(" / ")
312 | if len(bdinfo_audio_parts) <= 4:
313 | reply += self.reporter.print_report(
314 | "warning", "Could not verify audio " + self._section_id("audio", i)
315 | )
316 | return reply
317 |
318 | mediainfo_audio_title = self.mediainfo["audio"][i]["title"]
319 | (mediainfo_audio_title, _, _) = self.remove_until_first_codec.remove(
320 | mediainfo_audio_title
321 | )
322 |
323 | # [codec, channel, sampling rate, bit rate, bit depth]
324 | mediainfo_parts = mediainfo_audio_title.split(" / ")
325 | if len(mediainfo_parts) <= 4:
326 | reply += self.reporter.print_report(
327 | "warning", "Could not verify audio " + self._section_id("audio", i)
328 | )
329 | return reply
330 |
331 | # verify audio conversions
332 | if mediainfo_parts[0] in audio_to:
333 | disable_channels_check = self._eac3to_log_has_mono()
334 |
335 | if (
336 | not disable_channels_check
337 | and mediainfo_parts[1] != bdinfo_audio_parts[1]
338 | ):
339 | reply += self.reporter.print_report(
340 | "error",
341 | "Audio "
342 | + self._section_id("audio", i)
343 | + ": Channels should be `"
344 | + bdinfo_audio_parts[1]
345 | + "` instead of `"
346 | + mediainfo_parts[1]
347 | + "`",
348 | )
349 |
350 | # mediainfo bitrate should be less than bdinfo bitrate
351 | try:
352 | m_bit_rate = int(
353 | "".join(re.findall(r"\d+", mediainfo_parts[3].strip()))
354 | )
355 |
356 | bd_bit_rate = int(
357 | "".join(re.findall(r"\d+", bdinfo_audio_parts[3].strip()))
358 | )
359 |
360 | if m_bit_rate > bd_bit_rate:
361 | reply += self.reporter.print_report(
362 | "error",
363 | "Audio "
364 | + self._section_id("audio", i)
365 | + ": MediaInfo bitrate is greater than BDInfo bitrate: `"
366 | + str(m_bit_rate)
367 | + " kbps > "
368 | + str(bd_bit_rate)
369 | + " kbps`",
370 | )
371 | except ValueError:
372 | pass
373 | else:
374 | reply += self.reporter.print_report(
375 | "error",
376 | "Audio "
377 | + self._section_id("audio", i)
378 | + " should be converted to one of ["
379 | + ", ".join(audio_to)
380 | + "]",
381 | )
382 |
383 | return reply
384 |
385 | def _eac3to_log_has_mono(self):
386 | # get command-lines
387 |
388 | cmd_lines_mono = list()
389 | for log in self.eac3to:
390 | cmd_lines_mono.extend(
391 | [
392 | l.lower()
393 | for l in log
394 | if l.lower().startswith("command line:")
395 | and "-mono" in l.lower().split()
396 | ]
397 | )
398 |
399 | return len(cmd_lines_mono) > 0
400 |
--------------------------------------------------------------------------------
/vdator/checks/audio_track_people.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 | from nltk_people import extract_names
5 |
6 |
7 | class CheckAudioTrackPeople(Check, SectionId):
8 | def __init__(self, reporter, remove_until_first_codec, mediainfo, tmdb, ia):
9 | super().__init__(reporter, mediainfo, "Error checking IMDb/TMDb people")
10 | self.remove_until_first_codec = remove_until_first_codec
11 | self.tmdb = tmdb
12 | self.ia = ia
13 |
14 | # overriding abstract method
15 | def get_reply(self):
16 | reply = ""
17 |
18 | # check people in audio track names
19 | for i, _ in enumerate(self.mediainfo["audio"]):
20 | if "title" in self.mediainfo["audio"][i]:
21 | title = self.mediainfo["audio"][i]["title"]
22 |
23 | # skip if has an audio codec
24 | _, _, found_codec = self.remove_until_first_codec.remove(title)
25 | if found_codec:
26 | continue
27 |
28 | # try to match names
29 | matched_names = list()
30 | names = extract_names(title)
31 | search = self.tmdb.Search()
32 | for n in names:
33 | # TMDb API
34 | try:
35 | search.person(query=n)
36 | for s in search.results:
37 | if n == s["name"]:
38 | matched_names.append(n)
39 | except:
40 | reply += self.reporter.print_report(
41 | "info",
42 | "Audio "
43 | + self._section_id("audio", i)
44 | + ": Failed to get TMDb people data",
45 | )
46 | # IMDb API
47 | try:
48 | for person in self.ia.search_person(n):
49 | if n == person["name"]:
50 | matched_names.append(n)
51 | except:
52 | reply += self.reporter.print_report(
53 | "info",
54 | "Audio "
55 | + self._section_id("audio", i)
56 | + ": Failed to get IMDb people data",
57 | )
58 | matched_names = set(matched_names)
59 | if len(matched_names) > 0:
60 | reply += self.reporter.print_report(
61 | "correct",
62 | "Audio "
63 | + self._section_id("audio", i)
64 | + " People Matched: `"
65 | + ", ".join(matched_names)
66 | + "`",
67 | )
68 | unmatched_names = set(names) - set(matched_names)
69 | if len(unmatched_names) > 0:
70 | reply += self.reporter.print_report(
71 | "warning",
72 | "Audio "
73 | + self._section_id("audio", i)
74 | + " People Unmatched: `"
75 | + ", ".join(unmatched_names)
76 | + "`",
77 | )
78 |
79 | return reply
80 |
--------------------------------------------------------------------------------
/vdator/checks/audio_track_spellcheck.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 | from dotenv import load_dotenv
5 | import nltk
6 | from nltk_people import extract_names
7 | import hunspell, os, string
8 |
9 | # load environment variables
10 | load_dotenv()
11 |
12 | HUNSPELL_LANG = [x.strip() for x in os.environ.get("HUNSPELL_LANG").split(",")]
13 | MISSPELLED_IGNORE_LIST = [
14 | x.strip() for x in os.environ.get("MISSPELLED_IGNORE_LIST").split(",")
15 | ]
16 |
17 |
18 | class CheckAudioTrackSpellCheck(Check, SectionId):
19 | def __init__(self, reporter, remove_until_first_codec, mediainfo):
20 | super().__init__(reporter, mediainfo, "Error spell checking audio track names")
21 | self.hobj = hunspell.HunSpell(HUNSPELL_LANG[0], HUNSPELL_LANG[1])
22 | self.remove_until_first_codec = remove_until_first_codec
23 |
24 | # overriding abstract method
25 | def get_reply(self):
26 | reply = ""
27 |
28 | # spellcheck audio track names
29 | for i, _ in enumerate(self.mediainfo["audio"]):
30 | if "title" in self.mediainfo["audio"][i]:
31 | title, title_parts, found_codec = self.remove_until_first_codec.remove(
32 | self.mediainfo["audio"][i]["title"]
33 | )
34 |
35 | # spellcheck title parts before codec or entire audio title
36 | spellcheck_text = " ".join(title_parts) if found_codec else title
37 | if spellcheck_text:
38 | # map punctuation to space
39 | translator = str.maketrans(
40 | string.punctuation, " " * len(string.punctuation)
41 | )
42 | spellcheck_text = spellcheck_text.translate(translator)
43 |
44 | # ignore names
45 | ignore_list = extract_names(spellcheck_text)
46 | ignore_list = [a for b in ignore_list for a in b.split()]
47 |
48 | # tokenize
49 | tokens = nltk.word_tokenize(spellcheck_text)
50 | tokens = [t for t in tokens if t not in ignore_list]
51 |
52 | misspelled_words = list()
53 | for t in tokens:
54 | if not self.hobj.spell(t):
55 | # t is misspelled
56 | misspelled_words.append(t)
57 |
58 | misspelled_words = set(misspelled_words)
59 | misspelled_words = [
60 | word
61 | for word in misspelled_words
62 | if word.lower() not in MISSPELLED_IGNORE_LIST
63 | ]
64 | if len(misspelled_words) > 0:
65 | reply += self.reporter.print_report(
66 | "error",
67 | "Audio "
68 | + self._section_id("audio", i)
69 | + " Misspelled: `"
70 | + ", ".join(misspelled_words)
71 | + "`",
72 | )
73 |
74 | return reply
75 |
--------------------------------------------------------------------------------
/vdator/checks/chapter_language.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | from iso639 import languages as iso639_languages
4 | from langdetect import detect as langdetect_detect, DetectorFactory
5 |
6 | # make language detection deterministic
7 | DetectorFactory.seed = 0
8 |
9 |
10 | class CheckChapterLanguage(Check):
11 | def __init__(self, reporter, mediainfo):
12 | super().__init__(reporter, mediainfo, "Error checking chapter language")
13 |
14 | # overriding abstract method
15 | def get_reply(self):
16 | reply = ""
17 |
18 | if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0:
19 | if len(self.mediainfo["menu"]) >= 1:
20 | for i, chapters in enumerate(self.mediainfo["menu"]):
21 | if len(chapters) >= 1:
22 | # chapter numbers that have an invalid language
23 | invalid_ch_lang_nums = list()
24 | # chapters = list of chapters
25 | # [{'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']}]
26 | # {'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']}
27 | ch_0 = chapters[0]
28 | # concatenate all chapter titles into phrases
29 | # ch_0["languages"] = ['...', '...']
30 | # chapter_phrases = {'de': '...', 'en': '...'}
31 | chapter_phrases = {k: "" for k in ch_0["languages"]}
32 | # list of detected languages with chapter languages as keys
33 | # chapter_langs = {'de': [...], 'en': [...]}
34 | chapter_langs = {k: list() for k in ch_0["languages"]}
35 |
36 | for ch in chapters:
37 | for j, lang in enumerate(ch["languages"]):
38 | if lang:
39 | try:
40 | ch_lang = iso639_languages.get(part1=lang)
41 | # store chapter language
42 | chapter_langs[lang].append(ch_lang)
43 | except KeyError:
44 | # store invalid chapter number
45 | invalid_ch_lang_nums.append(str(j + 1))
46 | else:
47 | # store invalid chapter number
48 | invalid_ch_lang_nums.append(str(j + 1))
49 |
50 | for title in ch["titles"]:
51 | # store as key "NA" if there is no chapter language set
52 | if title["language"] is None:
53 | title["language"] = "NA"
54 | if title["language"] not in chapter_phrases:
55 | chapter_phrases[title["language"]] = ""
56 | chapter_phrases[title["language"]] += (
57 | title["title"] + "\n"
58 | )
59 |
60 | if len(invalid_ch_lang_nums) > 0:
61 | if len(invalid_ch_lang_nums) == len(chapters):
62 | reply += self.reporter.print_report(
63 | "error",
64 | f"Chapters {i + 1}: All chapters do not have a language set",
65 | )
66 | elif len(invalid_ch_lang_nums) > 0:
67 | reply += self.reporter.print_report(
68 | "error",
69 | f"Chapters {i + 1}: The following chapters do not have a language set: `"
70 | + ", ".join(invalid_ch_lang_nums)
71 | + "`",
72 | )
73 | else:
74 | reply += self.reporter.print_report(
75 | "correct",
76 | f"Chapters {i + 1}: All chapters have a language set",
77 | )
78 |
79 | for k, chapter_phrase in chapter_phrases.items():
80 | if k == "NA":
81 | reply += self.reporter.print_report(
82 | "error",
83 | f"Chapters {i + 1}: No chapter language set",
84 | )
85 | continue
86 | if chapter_phrase:
87 | chapter_langs[k] = list(set(chapter_langs[k]))
88 | try:
89 | detected_lang = langdetect_detect(chapter_phrase)
90 | ch_detected_lang = iso639_languages.get(
91 | part1=detected_lang
92 | )
93 | if ch_detected_lang in chapter_langs[k]:
94 | reply += self.reporter.print_report(
95 | "correct",
96 | f"Chapters {i + 1}: Language matches detected language: `"
97 | + ch_detected_lang.name
98 | + "`",
99 | )
100 | else:
101 | chapter_langs_names = ", ".join(
102 | list(
103 | set(
104 | [
105 | detected_lang.name
106 | for detected_lang in chapter_langs[
107 | k
108 | ]
109 | ]
110 | )
111 | )
112 | )
113 | if chapter_langs_names:
114 | reply += self.reporter.print_report(
115 | "error",
116 | f"Chapters {i + 1}: Languages: `"
117 | + chapter_langs_names
118 | + "` do not match detected language: `"
119 | + ch_detected_lang.name
120 | + "`",
121 | )
122 | else:
123 | reply += self.reporter.print_report(
124 | "error",
125 | f"Chapters {i + 1}: No chapter languages. Detected language: `"
126 | + ch_detected_lang.name
127 | + "`",
128 | )
129 | except KeyError:
130 | reply += self.reporter.print_report(
131 | "warning", "Could not detect chapters language"
132 | )
133 | else:
134 | reply += self.reporter.print_report(
135 | "error", "Must have at least 1 chapter menu"
136 | )
137 |
138 | return reply
139 |
--------------------------------------------------------------------------------
/vdator/checks/chapter_padding.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | import re
4 |
5 |
6 | class CheckChapterPadding(Check):
7 | def __init__(self, reporter, mediainfo):
8 | super().__init__(reporter, mediainfo, "Error checking chapter padding")
9 |
10 | # overriding abstract method
11 | def get_reply(self):
12 | reply, padded_correctly = "", True
13 |
14 | if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0:
15 | if len(self.mediainfo["menu"]) >= 1:
16 | for i, menu in enumerate(self.mediainfo["menu"]):
17 | padded_correctly = True
18 | num_chapters = len(menu)
19 | for ch in menu:
20 | for title in ch["titles"]:
21 | if re.search(
22 | r"^chapter\s\d+", title["title"], re.IGNORECASE
23 | ):
24 | # numbered chapter
25 | ch_num = "".join(re.findall(r"[\d]+", title["title"]))
26 | if ch_num != ch_num.zfill(len(str(num_chapters))):
27 | padded_correctly = False
28 | break
29 | if padded_correctly:
30 | reply += self.reporter.print_report(
31 | "correct", f"Chapters {i + 1}: Properly padded"
32 | )
33 | else:
34 | reply += self.reporter.print_report(
35 | "error", f"Chapters {i + 1}: Incorrect padding"
36 | )
37 |
38 | return reply
39 |
--------------------------------------------------------------------------------
/vdator/checks/check.py:
--------------------------------------------------------------------------------
1 | import sys, traceback
2 |
3 | # allow imports from parent directory
4 | sys.path.append("../")
5 |
6 | from abc import abstractmethod
7 | from pydash import has
8 | from helpers import has_many, show_diff, is_float
9 |
10 |
11 | class Check(object):
12 | def __init__(self, reporter, mediainfo, run_fail_msg):
13 | self.reporter = reporter
14 | self.mediainfo = mediainfo
15 | self.run_fail_msg = run_fail_msg
16 |
17 | def run(self):
18 | """
19 | Runs the check and returns reply.
20 | Wraps check in try...except to prevent crashes
21 |
22 | Returns
23 | -------
24 | reply string
25 | """
26 | reply = ""
27 | try:
28 | reply += self.get_reply()
29 | except:
30 | traceback.print_exc()
31 | reply += self.reporter.print_report("fail", self.run_fail_msg)
32 | return reply
33 |
34 | @abstractmethod
35 | def get_reply(self):
36 | """
37 | Gets reply from this check
38 |
39 | Returns
40 | -------
41 | reply string
42 | """
43 | pass
44 |
--------------------------------------------------------------------------------
/vdator/checks/filename.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | from dotenv import load_dotenv
4 | import os, re, string, unidecode
5 |
6 | # load environment variables
7 | load_dotenv()
8 |
9 | CUTS = [None] + [x.strip() for x in os.environ.get("FILENAME_CUTS").split(",")]
10 | RELEASE_GROUP = os.environ.get("RELEASE_GROUP").strip()
11 | TRAINEE_CHANNELS = [x.strip() for x in os.environ.get("TRAINEE_CHANNELS").split(",")]
12 | INTERNAL_CHANNELS = [x.strip() for x in os.environ.get("INTERNAL_CHANNELS").split(",")]
13 |
14 |
15 | class CheckFilename(Check):
16 | def __init__(
17 | self,
18 | reporter,
19 | source_detector,
20 | codecs,
21 | remove_until_first_codec,
22 | mediainfo,
23 | bdinfo,
24 | channel_name,
25 | ):
26 | super().__init__(reporter, mediainfo, "Error checking filename")
27 | self.source_detector = source_detector
28 | self.codecs = codecs
29 | self.remove_until_first_codec = remove_until_first_codec
30 | self.bdinfo = bdinfo
31 | self.channel_name = channel_name
32 |
33 | # overriding abstract method
34 | def get_reply(self):
35 | reply = ""
36 |
37 | if has_many(self.mediainfo, "general.0", ["movie_name", "complete_name"]):
38 | complete_name = self.mediainfo["general"][0]["complete_name"]
39 | if "\\" in complete_name:
40 | complete_name = complete_name.split("\\")[-1]
41 | elif "/" in complete_name:
42 | complete_name = complete_name.split("/")[-1]
43 |
44 | # possible release names
45 | complete_name_lc = complete_name.lower()
46 | possible_release_names = [
47 | self._construct_release_name(
48 | cut,
49 | hybird=("hybrid" in complete_name_lc),
50 | repack=("repack" in complete_name_lc),
51 | )
52 | for cut in CUTS
53 | ]
54 |
55 | if (
56 | self.channel_name in INTERNAL_CHANNELS
57 | and complete_name in possible_release_names
58 | ):
59 | reply += self.reporter.print_report(
60 | "correct", "Filename: `" + complete_name + "`"
61 | )
62 | elif self._partial_match(possible_release_names, complete_name):
63 | reply += self.reporter.print_report(
64 | "correct", "Filename: `" + complete_name + "`"
65 | )
66 | else:
67 | expected_release_name = possible_release_names[0]
68 |
69 | # pick the expected release name with the proper cut
70 | for i, cut in enumerate(CUTS[1:]):
71 | if cut in complete_name:
72 | expected_release_name = possible_release_names[i + 1]
73 |
74 | if self.channel_name not in INTERNAL_CHANNELS:
75 | expected_release_name += "GRouP.mkv"
76 |
77 | reply += self.reporter.print_report(
78 | "error",
79 | "Filename missmatch:\n```fix\nFilename: "
80 | + complete_name
81 | + "\nExpected: "
82 | + expected_release_name
83 | + "```",
84 | new_line=False,
85 | )
86 | reply += show_diff(complete_name, expected_release_name)
87 | else:
88 | reply += self.reporter.print_report("error", "Cannot validate filename")
89 |
90 | return reply
91 |
92 | def _construct_release_name(self, cut=None, hybird=False, repack=False):
93 | release_name = ""
94 |
95 | if not self.source_detector.is_dvd():
96 | # scan type must come from bdinfo
97 | bdinfo_video_parts = self.bdinfo["video"][0].split(" / ")
98 | scan_type = bdinfo_video_parts[2].strip()[-1].lower()
99 |
100 | if has_many(self.mediainfo, "video.0", ["height", "title"]) and has(
101 | self.mediainfo, "audio.0.title"
102 | ):
103 | # Name.S01E01 or Name.S01E01E02
104 | tv_show_name_search = re.search(
105 | r"(.+)\s-\s(S\d{2}(E\d{2})+)",
106 | self.mediainfo["general"][0]["movie_name"],
107 | )
108 | # Name.Year
109 | movie_name_search = re.search(
110 | r"(.+)\s\((\d{4})\)", self.mediainfo["general"][0]["movie_name"]
111 | )
112 | if tv_show_name_search:
113 | title = self._format_filename_title(tv_show_name_search.group(1))
114 | season_episode = tv_show_name_search.group(2).strip()
115 | release_name += title + "." + season_episode
116 | elif movie_name_search:
117 | title = self._format_filename_title(movie_name_search.group(1))
118 | year = movie_name_search.group(2).strip()
119 | release_name += title + "." + year
120 | else:
121 | release_name += self._format_filename_title(
122 | self.mediainfo["general"][0]["movie_name"]
123 | )
124 |
125 | # with or without hybrid
126 | if hybird:
127 | release_name += ".Hybrid"
128 |
129 | # with or without repack
130 | if repack:
131 | release_name += ".REPACK"
132 |
133 | # check cuts here
134 | if cut is not None:
135 | release_name += "." + cut
136 |
137 | # resolution (ex. 1080p)
138 | height = "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
139 |
140 | if self.source_detector.is_dvd():
141 | # source DVD
142 | if "standard" in self.mediainfo["video"][0]:
143 | release_name += "." + self.mediainfo["video"][0]["standard"]
144 | release_name += ".DVD.REMUX"
145 | elif self.source_detector.is_uhd():
146 | # source UHD BluRay
147 | release_name += "." + height
148 | release_name += scan_type
149 | release_name += ".UHD.BluRay.REMUX"
150 | # Dolby Vision (DV)
151 | if self.source_detector.is_dv():
152 | release_name += ".DV"
153 | # SDR/HDR
154 | if self.mediainfo["video"][0]["color_primaries"] == "BT.2020":
155 | release_name += ".HDR"
156 | else:
157 | release_name += ".SDR"
158 | else:
159 | # source HD BluRay
160 | release_name += "." + height
161 | release_name += scan_type
162 | release_name += ".BluRay.REMUX"
163 |
164 | # video format (ex. AVC)
165 | main_video_title = self.mediainfo["video"][0]["title"].split(" / ")
166 | if len(main_video_title) >= 1:
167 | release_name += "." + self.codecs.get_video_codec_title_name(
168 | main_video_title[0].strip()
169 | )
170 |
171 | main_audio_title = self.mediainfo["audio"][0]["title"]
172 | (
173 | main_audio_title,
174 | _,
175 | _,
176 | ) = self.remove_until_first_codec.remove(main_audio_title)
177 | main_audio_title_parts = main_audio_title.split(" / ")
178 |
179 | audio_codec_title, main_audio_channels = None, None
180 |
181 | # get main audio codec
182 | if len(main_audio_title) > 0:
183 | main_audio_codec = main_audio_title_parts[0]
184 | if self.codecs.is_audio_title(main_audio_codec):
185 | audio_codec_title = self.codecs.get_audio_codec_title_name(
186 | main_audio_codec
187 | )
188 |
189 | # get main audio channels
190 | if len(main_audio_title) > 1:
191 | main_audio_channels = main_audio_title_parts[1]
192 | search_channel_atmos = re.search(
193 | r"(\d.\d)\+\d+\sobjects", main_audio_channels
194 | )
195 | if search_channel_atmos:
196 | main_audio_channels = search_channel_atmos.group(1)
197 |
198 | if (
199 | audio_codec_title
200 | and main_audio_channels
201 | and is_float(main_audio_channels)
202 | ):
203 | # have main audio codec and channels
204 | if audio_codec_title == "TrueHD.Atmos":
205 | # atmos channel
206 | release_name += ".TrueHD." + main_audio_channels + ".Atmos"
207 | else:
208 | release_name += "." + audio_codec_title + "." + main_audio_channels
209 |
210 | # release group
211 | release_name += "-"
212 | if self.channel_name in INTERNAL_CHANNELS:
213 | release_name += RELEASE_GROUP + ".mkv"
214 |
215 | # replace multiple dots with one
216 | release_name = re.sub("\.+", ".", release_name)
217 |
218 | return release_name
219 |
220 | def _format_filename_title(self, title):
221 | title = title.strip()
222 | # remove accents
223 | title = unidecode.unidecode(title)
224 | # remove punctuation
225 | title = title.replace("&", "and")
226 | title = "".join([i for i in title if not i in string.punctuation or i == "."])
227 | title = title.replace(":", ".")
228 | # replace spaces with dots
229 | title = title.replace(" ", ".")
230 | # force single dots
231 | title = re.sub(r"\.+", ".", title)
232 | return title
233 |
234 | def _partial_match(self, possible_names, name):
235 | for n in possible_names:
236 | if n in name:
237 | return True
238 | return False
239 |
--------------------------------------------------------------------------------
/vdator/checks/flac_audio_tracks.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 | import re
5 |
6 |
7 | class CheckFLACAudioTracks(Check, SectionId):
8 | def __init__(self, reporter, remove_until_first_codec, mediainfo):
9 | super().__init__(reporter, mediainfo, "Error checking FLAC audio tracks")
10 | self.remove_until_first_codec = remove_until_first_codec
11 |
12 | # overriding abstract method
13 | def get_reply(self):
14 | # check FLAC Audio tracks using mediainfo
15 | reply = ""
16 |
17 | if len(self.mediainfo["audio"]) > 0:
18 | for i, audio_track in enumerate(self.mediainfo["audio"]):
19 | # skip if no title
20 | if "title" not in audio_track:
21 | continue
22 |
23 | # skip if no codec info
24 | audio_title, _, found_codec = self.remove_until_first_codec.remove(
25 | audio_track["title"]
26 | )
27 | if not found_codec:
28 | continue
29 |
30 | if "format" in audio_track and audio_track["format"] == "FLAC":
31 | channels = float(
32 | "".join(
33 | re.findall(
34 | r"\d*\.\d+|\d+", audio_track["channels"].strip().lower()
35 | )
36 | )
37 | )
38 | sampling_rate = int(
39 | float(
40 | "".join(
41 | re.findall(
42 | r"\d*\.\d+|\d+",
43 | audio_track["sampling_rate"].strip().lower(),
44 | )
45 | )
46 | )
47 | )
48 | bit_rate = int(
49 | "".join(
50 | re.findall(r"\d+", audio_track["bit_rate"].strip().lower())
51 | )
52 | )
53 | bit_depth = (
54 | audio_track["bit_depth"]
55 | .strip()
56 | .lower()
57 | .replace(" bits", "-bit")
58 | )
59 | test_title = (
60 | "FLAC Audio / "
61 | + "{:.1f}".format(channels)
62 | + " / "
63 | + str(sampling_rate)
64 | + " kHz / "
65 | + str(bit_rate)
66 | + " kbps / "
67 | + bit_depth
68 | )
69 |
70 | if test_title == audio_title:
71 | reply += self.reporter.print_report(
72 | "correct",
73 | "Audio "
74 | + self._section_id("audio", i)
75 | + ": FLAC Good track name (from MediaInfo)",
76 | )
77 | else:
78 | reply += self.reporter.print_report(
79 | "error",
80 | "Audio "
81 | + self._section_id("audio", i)
82 | + ": FLAC Bad track name (from MediaInfo):\n```fix\nActual: "
83 | + audio_title
84 | + "\nExpected: "
85 | + test_title
86 | + "```",
87 | new_line=False,
88 | )
89 |
90 | return reply
91 |
--------------------------------------------------------------------------------
/vdator/checks/has_chapters.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 |
4 | class CheckHasChapters(Check):
5 | def __init__(self, reporter, mediainfo, eac3to):
6 | super().__init__(reporter, mediainfo, "Error checking if should have chapters")
7 | self.eac3to = eac3to
8 |
9 | # overriding abstract method
10 | def get_reply(self):
11 | reply, should_have_chapters = "", False
12 | for log in self.eac3to:
13 | for l in log:
14 | if "chapters" in l:
15 | should_have_chapters = True
16 | if should_have_chapters:
17 | if len(self.mediainfo["menu"]) > 0:
18 | reply += self.reporter.print_report(
19 | "correct", "Has chapters (from eac3to log)"
20 | )
21 | else:
22 | reply += self.reporter.print_report(
23 | "error", "Should have chapters (from eac3to log)"
24 | )
25 | return reply
26 |
--------------------------------------------------------------------------------
/vdator/checks/metadata_default_flag.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 |
4 | class CheckMetadataDefaultFlag(Check):
5 | def __init__(self, reporter, mediainfo):
6 | super().__init__(reporter, mediainfo, "Error checking metadata default flag")
7 |
8 | # overriding abstract method
9 | def get_reply(self):
10 | # only one track of each type should be default=yes
11 | reply, default_yes_error = "", False
12 | track_types = ["audio", "text"]
13 |
14 | for track_type in track_types:
15 | default_yes_count = 0
16 | for track in self.mediainfo[track_type]:
17 | if "default" in track and track["default"].lower() == "yes":
18 | default_yes_count += 1
19 | if default_yes_count > 1:
20 | reply += self.reporter.print_report(
21 | "error",
22 | "Only 1 {} track should be `default=yes`".format(track_type),
23 | )
24 | default_yes_error = True
25 |
26 | if not default_yes_error:
27 | reply += self.reporter.print_report(
28 | "correct",
29 | "Only 1 track of each type is `default=yes`",
30 | )
31 | return reply
32 |
--------------------------------------------------------------------------------
/vdator/checks/metadata_ids.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import IsMovie
3 |
4 | from dotenv import load_dotenv
5 | import datetime, os, re
6 | import imdb
7 |
8 | # load environment variables
9 | load_dotenv()
10 |
11 | # how many years off the movie year can be. (default: 1)
12 | MOVIE_YEAR_OFFSET = int(os.environ.get("MOVIE_YEAR_OFFSET", "1").strip())
13 |
14 |
15 | class CheckMetadataIds(Check, IsMovie):
16 | def __init__(self, reporter, mediainfo, tmdb, ia):
17 | super().__init__(reporter, mediainfo, "Error parsing IMDb/TMDb ids")
18 | self.tmdb = tmdb
19 | self.ia = ia
20 |
21 | # overriding abstract method
22 | def get_reply(self):
23 | reply = ""
24 |
25 | imdb_movie, tmdb_info, tmdb_year = None, None, None
26 |
27 | movie_data = {"name": None, "year": None}
28 |
29 | matched = {
30 | "imdb_title": False,
31 | "imdb_year": False,
32 | "tmdb_title": False,
33 | "tmdb_year": False,
34 | # matched movie title/year with either imdb or tmdb
35 | "title": False,
36 | "year": False,
37 | "title_replied": False,
38 | "year_replied": False,
39 | }
40 |
41 | # is it a movie or tv show?
42 | is_movie = self._is_movie()
43 |
44 | # extract movie name and year or tv show name
45 | if has(self.mediainfo, "general.0.movie_name"):
46 | if is_movie:
47 | # movie
48 | movie_name = re.search(
49 | r"^(.+)\((\d{4})\)", self.mediainfo["general"][0]["movie_name"]
50 | )
51 | if movie_name:
52 | movie_data["name"] = movie_name.group(1).strip()
53 | movie_data["year"] = movie_name.group(2).strip()
54 | else:
55 | # tv show
56 | tv_show_name = re.search(
57 | r"^(.+)\s-\s.+\s-\s.+", self.mediainfo["general"][0]["movie_name"]
58 | )
59 | if tv_show_name:
60 | movie_data["name"] = tv_show_name.group(1).strip()
61 |
62 | if has(self.mediainfo, "general.0.imdb"):
63 | imdb_id = "".join(
64 | re.findall(r"[\d]+", self.mediainfo["general"][0]["imdb"])
65 | )
66 | try:
67 | imdb_movie = self.ia.get_movie(imdb_id)
68 | except imdb._exceptions.IMDbParserError:
69 | reply += self.reporter.print_report(
70 | "error",
71 | "Invalid IMDb id: `" + self.mediainfo["general"][0]["imdb"] + "`",
72 | )
73 | except:
74 | # imdb._exceptions.IMDbDataAccessError
75 | reply += self.reporter.print_report(
76 | "info",
77 | "Failed to get IMDb movie data for id: `"
78 | + self.mediainfo["general"][0]["imdb"]
79 | + "`",
80 | )
81 | else:
82 | # force single space in movie name
83 | imdb_movie["title"] = re.sub(r"\s+", " ", imdb_movie["title"])
84 | matched["imdb_title"] = movie_data["name"] == imdb_movie["title"]
85 | if is_movie:
86 | matched["imdb_year"] = self._year_range(
87 | imdb_movie["year"], movie_data["year"]
88 | )
89 |
90 | if has(self.mediainfo, "general.0.tmdb"):
91 | tmdb_id = "".join(
92 | re.findall(r"[\d]+", self.mediainfo["general"][0]["tmdb"])
93 | )
94 | # movie or tv show
95 | tmdb_data = self.tmdb.Movies(tmdb_id) if is_movie else self.tmdb.TV(tmdb_id)
96 |
97 | try:
98 | tmdb_info = tmdb_data.info()
99 | # force single space in movie name
100 | if "title" in tmdb_info:
101 | tmdb_info["title"] = re.sub(r"\s+", " ", tmdb_info["title"])
102 | except:
103 | reply += self.reporter.print_report(
104 | "info",
105 | "Failed to get TMDb data for id: `"
106 | + self.mediainfo["general"][0]["tmdb"]
107 | + "`",
108 | )
109 | else:
110 | if is_movie:
111 | # movie
112 | if "release_date" in tmdb_info and tmdb_info["release_date"]:
113 | datetime_obj = datetime.datetime.strptime(
114 | tmdb_info["release_date"], "%Y-%m-%d"
115 | )
116 | tmdb_year = str(datetime_obj.year)
117 | # tmdb_info["original_title"] is original title
118 | # tmdb_info["title"] is the translated title in whatever language you're requesting
119 | matched["tmdb_title"] = (
120 | "title" in tmdb_info
121 | and movie_data["name"] == tmdb_info["title"]
122 | )
123 | matched["tmdb_year"] = tmdb_year and self._year_range(
124 | tmdb_year, movie_data["year"]
125 | )
126 | else:
127 | # tv show
128 | matched["tmdb_title"] = (
129 | "title" in tmdb_info
130 | and movie_data["name"] == tmdb_info["title"]
131 | )
132 |
133 | # matched title/year with either imdb or tmdb
134 | matched["title"] = matched["imdb_title"] or matched["tmdb_title"]
135 | matched["year"] = matched["imdb_year"] or matched["tmdb_year"]
136 |
137 | if has(self.mediainfo, "general.0.imdb") or has(
138 | self.mediainfo, "general.0.tmdb"
139 | ):
140 | if is_movie:
141 | # movie
142 | if matched["title"] and matched["year"]:
143 | reply += self.reporter.print_report(
144 | "correct", "Matched movie name and year with IMDb/TMDb"
145 | )
146 | else:
147 | if matched["title"]:
148 | reply += self.reporter.print_report(
149 | "correct", "Matched movie name with IMDb/TMDb"
150 | )
151 | else:
152 | if imdb_movie and "title" in imdb_movie and imdb_movie["title"]:
153 | reply += self.reporter.print_report(
154 | "error", "IMDb: Name: `" + imdb_movie["title"] + "`"
155 | )
156 | if movie_data["name"]:
157 | reply += show_diff(
158 | movie_data["name"], imdb_movie["title"]
159 | )
160 | matched["title_replied"] = True
161 | # tmdb_info["original_title"] is original title
162 | # tmdb_info["title"] is the translated title in whatever language you're requesting
163 | if tmdb_info and "title" in tmdb_info and tmdb_info["title"]:
164 | reply += self.reporter.print_report(
165 | "error", "TMDb: Name: `" + tmdb_info["title"] + "`"
166 | )
167 | if movie_data["name"]:
168 | reply += show_diff(
169 | movie_data["name"], tmdb_info["title"]
170 | )
171 | matched["title_replied"] = True
172 | if not matched["title_replied"]:
173 | reply += self.reporter.print_report(
174 | "error", "Failed to match movie name with IMDb/TMDb"
175 | )
176 |
177 | if matched["year"]:
178 | reply += self.reporter.print_report(
179 | "correct", "Matched movie year with IMDb/TMDb"
180 | )
181 | else:
182 | if imdb_movie and "year" in imdb_movie:
183 | reply += self.reporter.print_report(
184 | "error", "IMDb: Year: `" + str(imdb_movie["year"]) + "`"
185 | )
186 | matched["year_replied"] = True
187 | if tmdb_year:
188 | reply += self.reporter.print_report(
189 | "error", "TMDb: Year: `" + str(tmdb_year) + "`"
190 | )
191 | matched["year_replied"] = True
192 | if not matched["year_replied"]:
193 | reply += self.reporter.print_report(
194 | "error", "Failed to match movie year with IMDb/TMDb"
195 | )
196 | else:
197 | # tv show
198 | if matched["title"]:
199 | reply += self.reporter.print_report(
200 | "correct", "Matched tv show name with IMDb/TMDb"
201 | )
202 | else:
203 | if imdb_movie and "title" in imdb_movie:
204 | reply += self.reporter.print_report(
205 | "error", "IMDb: Name: `" + imdb_movie["title"] + "`"
206 | )
207 | matched["title_replied"] = True
208 | if tmdb_info and "name" in tmdb_info:
209 | reply += self.reporter.print_report(
210 | "error", "TMDb: Name: `" + tmdb_info["name"] + "`"
211 | )
212 | matched["title_replied"] = True
213 | if not matched["title_replied"]:
214 | reply += self.reporter.print_report(
215 | "error", "Failed to match tv show name with IMDb/TMDb"
216 | )
217 |
218 | return reply
219 |
220 | def _year_range(self, year, test_year, offset=MOVIE_YEAR_OFFSET):
221 | # self._year_range(year, test_year)
222 | # example: with offset = 1, and year = 2004, test_year can be between 2003 and 2005 inclusive
223 | # 2002 in range(2004 - 1, (2004 + 1) + 1) False
224 | # 2003 in range(2004 - 1, (2004 + 1) + 1) True
225 | # 2004 in range(2004 - 1, (2004 + 1) + 1) True
226 | # 2005 in range(2004 - 1, (2004 + 1) + 1) True
227 | # 2006 in range(2004 - 1, (2004 + 1) + 1) False
228 | if not (year and test_year):
229 | return False
230 | year = int(year)
231 | test_year = int(test_year)
232 | return test_year in range(year - offset, (year + offset) + 1)
233 |
--------------------------------------------------------------------------------
/vdator/checks/mixins/__init__.py:
--------------------------------------------------------------------------------
1 | from .is_commentary_track import *
2 | from .is_movie import *
3 | from .print_header import *
4 | from .section_id import *
5 |
--------------------------------------------------------------------------------
/vdator/checks/mixins/is_commentary_track.py:
--------------------------------------------------------------------------------
1 | class IsCommentaryTrack(object):
2 | def _is_commentary_track(self, title):
3 | return "commentary" in title.lower().split()
4 |
--------------------------------------------------------------------------------
/vdator/checks/mixins/is_movie.py:
--------------------------------------------------------------------------------
1 | from pydash import has
2 | import re
3 |
4 |
5 | class IsMovie(object):
6 |
7 | # returns True if its a movie, False if tv show
8 | def _is_movie(self):
9 | # is it a movie or tv show? assume movie
10 | is_movie = True
11 | determined_movie_or_tv = False
12 |
13 | if has(self.mediainfo, "general.0.tmdb"):
14 | if self.mediainfo["general"][0]["tmdb"].startswith("movie/"):
15 | is_movie = True
16 | determined_movie_or_tv = True
17 | elif self.mediainfo["general"][0]["tmdb"].startswith("tv/"):
18 | is_movie = False
19 | determined_movie_or_tv = True
20 |
21 | if not determined_movie_or_tv:
22 | if has(self.mediainfo, "general.0.movie_name"):
23 | # tv show name in format "Name - S01E01" or "Name - S01E01E02"
24 | is_tv = re.search(
25 | r"^.+\s-\sS\d{2}(E\d{2})+.*$",
26 | self.mediainfo["general"][0]["movie_name"],
27 | )
28 | if is_tv:
29 | is_movie = not (is_tv)
30 | return is_movie
31 |
--------------------------------------------------------------------------------
/vdator/checks/mixins/print_header.py:
--------------------------------------------------------------------------------
1 | class PrintHeader(object):
2 | def _print_header(self, heading):
3 | return "> **{}**\n".format(heading)
4 |
--------------------------------------------------------------------------------
/vdator/checks/mixins/section_id.py:
--------------------------------------------------------------------------------
1 | class SectionId(object):
2 | def _section_id(self, section, i):
3 | reply = ""
4 | if "id" in self.mediainfo[section.lower()][i]:
5 | reply += "#" + self.mediainfo[section.lower()][i]["id"]
6 | else:
7 | reply += str(i)
8 | return reply
9 |
--------------------------------------------------------------------------------
/vdator/checks/mkvmerge.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | from dotenv import load_dotenv
4 | import os, re, requests
5 |
6 | # load environment variables
7 | load_dotenv()
8 |
9 | MKVMERGE_VERSION = os.environ.get("MKVMERGE_VERSION")
10 |
11 |
12 | class CheckMKVMerge(Check):
13 | def __init__(self, reporter, mediainfo):
14 | super().__init__(
15 | reporter,
16 | mediainfo,
17 | "Error checking mkvtoolnix version",
18 | )
19 |
20 | def run(self):
21 | """
22 | Runs the check and returns reply.
23 | Wraps check in try...except to prevent crashes
24 |
25 | Returns
26 | -------
27 | reply string
28 | """
29 | reply = ""
30 | try:
31 | reply += self.get_reply(MKVMERGE_VERSION)
32 | except:
33 | traceback.print_exc()
34 | reply += self.reporter.print_report("fail", self.run_fail_msg)
35 | return reply
36 |
37 | # overriding abstract method
38 | # force_version = "Version 57.0.0 \"Till The End\" 2021-05-22"
39 | # force_version = "Version 76.0 \"Celebration\" 2023-04-30"
40 | def get_reply(self, force_version=None):
41 | reply = ""
42 |
43 | version_name_regex_mkvtoolnix = r'"(.*)"'
44 | version_name_regex_mediainfo = r"\'(.*)\'"
45 | version_num_regex = r"(\d+\.\d+(\.\d+)?)"
46 |
47 | if not has(self.mediainfo, "general.0.writing_application"):
48 | reply += self.reporter.print_report("info", "Not using mkvtoolnix")
49 | return reply
50 |
51 | mediainfo_version_num = re.search(
52 | version_num_regex, self.mediainfo["general"][0]["writing_application"]
53 | )
54 | if mediainfo_version_num:
55 | mediainfo_version_num = mediainfo_version_num.group(1)
56 |
57 | mediainfo_version_name = re.search(
58 | version_name_regex_mediainfo,
59 | self.mediainfo["general"][0]["writing_application"],
60 | )
61 | if mediainfo_version_name:
62 | mediainfo_version_name = mediainfo_version_name.group(1)
63 |
64 | if not mediainfo_version_num or not mediainfo_version_name:
65 | reply += self.reporter.print_report("info", "Not using mkvtoolnix")
66 | return reply
67 |
68 | try:
69 | r = requests.get(os.environ.get("MKVTOOLNIX_NEWS"))
70 | if r.status_code == 200:
71 | ## Version 32.0.0 "Astral Progressions" 2019-03-12
72 | ## Version 76.0 "Celebration" 2023-04-30
73 | mkvtoolnix_version_line = r.text.splitlines()[0]
74 | if force_version:
75 | mkvtoolnix_version_line = force_version
76 |
77 | mkvtoolnix_version_num = re.search(
78 | version_num_regex, mkvtoolnix_version_line
79 | )
80 | if mkvtoolnix_version_num:
81 | mkvtoolnix_version_num = mkvtoolnix_version_num.group(1)
82 |
83 | mkvtoolnix_version_name = re.search(
84 | version_name_regex_mkvtoolnix, mkvtoolnix_version_line
85 | )
86 | if mkvtoolnix_version_name:
87 | mkvtoolnix_version_name = mkvtoolnix_version_name.group(1)
88 |
89 | if (
90 | mkvtoolnix_version_num == mediainfo_version_num
91 | and mkvtoolnix_version_name == mediainfo_version_name
92 | ):
93 | reply += self.reporter.print_report(
94 | "correct",
95 | "Uses latest mkvtoolnix: `"
96 | + mediainfo_version_num
97 | + ' "'
98 | + mediainfo_version_name
99 | + '"`',
100 | )
101 | else:
102 | reply += self.reporter.print_report(
103 | "warning",
104 | "Not using latest mkvtoolnix: `"
105 | + mediainfo_version_num
106 | + ' "'
107 | + mediainfo_version_name
108 | + '"` latest is: `'
109 | + mkvtoolnix_version_num
110 | + ' "'
111 | + mkvtoolnix_version_name
112 | + '"`',
113 | )
114 | except:
115 | reply += self.reporter.print_report(
116 | "info", "Could not fetch latest mkvtoolnix version"
117 | )
118 | return reply
119 |
120 | return reply
121 |
--------------------------------------------------------------------------------
/vdator/checks/movie_name_format.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import IsMovie
3 |
4 | import re
5 |
6 |
7 | class CheckMovieNameFormat(Check, IsMovie):
8 | def __init__(self, reporter, mediainfo):
9 | super().__init__(reporter, mediainfo, "Error parsing movie name")
10 |
11 | # overriding abstract method
12 | def get_reply(self):
13 | reply = ""
14 |
15 | # is it a movie or tv show?
16 | is_movie = self._is_movie()
17 |
18 | if has(self.mediainfo, "general.0.movie_name"):
19 | if is_movie:
20 | # movie name in format "Name (Year)"
21 | if re.search(
22 | r"^.+\(\d{4}\)$", self.mediainfo["general"][0]["movie_name"]
23 | ):
24 | reply += self.reporter.print_report(
25 | "correct",
26 | "Movie name format `Name (Year)`: `"
27 | + self.mediainfo["general"][0]["movie_name"]
28 | + "`",
29 | )
30 | else:
31 | reply += self.reporter.print_report(
32 | "error",
33 | "Movie name does not match format `Name (Year)`: `"
34 | + self.mediainfo["general"][0]["movie_name"]
35 | + "`",
36 | )
37 | reply += self._movie_name_extra_space(
38 | self.mediainfo["general"][0]["movie_name"]
39 | )
40 | else:
41 | # tv show name in format "Name - S01E01" or "Name - S01E01E02"
42 | if re.search(
43 | r"^.+\s-\sS\d{2}(E\d{2})+.*$",
44 | self.mediainfo["general"][0]["movie_name"],
45 | ):
46 | reply += self.reporter.print_report(
47 | "correct",
48 | "TV show name format `Name - S01E01`: `"
49 | + self.mediainfo["general"][0]["movie_name"]
50 | + "`",
51 | )
52 | else:
53 | reply += self.reporter.print_report(
54 | "error",
55 | "TV show name does not match format `Name - S01E01`: `"
56 | + self.mediainfo["general"][0]["movie_name"]
57 | + "`",
58 | )
59 | reply += self._movie_name_extra_space(
60 | self.mediainfo["general"][0]["movie_name"]
61 | )
62 | else:
63 | reply += self.reporter.print_report("error", "Missing movie name")
64 |
65 | return reply
66 |
67 | def _movie_name_extra_space(self, movie_name):
68 | reply = ""
69 |
70 | if movie_name.startswith(" "):
71 | reply += self.reporter.print_report(
72 | "error", "Movie name starts with an extra space!"
73 | )
74 |
75 | if movie_name.endswith(" "):
76 | reply += self.reporter.print_report(
77 | "error", "Movie name ends with an extra space!"
78 | )
79 |
80 | return reply
81 |
--------------------------------------------------------------------------------
/vdator/checks/muxing_mode.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 |
4 | class CheckMuxingMode(Check):
5 | def __init__(self, reporter, mediainfo):
6 | super().__init__(
7 | reporter,
8 | mediainfo,
9 | "Error checking muxing mode",
10 | )
11 |
12 | # overriding abstract method
13 | def get_reply(self):
14 | reply, is_valid = "", True
15 |
16 | for section in ["general", "video", "audio", "text"]:
17 | for i, _ in enumerate(self.mediainfo[section]):
18 | if "muxing_mode" in self.mediainfo[section][i]:
19 | reply += self.reporter.print_report(
20 | "error",
21 | section.capitalize()
22 | + " #"
23 | + self.mediainfo[section][i]["id"]
24 | + " has muxing mode: `"
25 | + self.mediainfo[section][i]["muxing_mode"]
26 | + "`",
27 | )
28 | is_valid = False
29 |
30 | if is_valid:
31 | reply += self.reporter.print_report(
32 | "correct", "All tracks do not have a muxing mode"
33 | )
34 |
35 | return reply
36 |
--------------------------------------------------------------------------------
/vdator/checks/print_audio_track_names.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 |
5 | class CheckPrintAudioTrackNames(Check, SectionId):
6 | def __init__(self, reporter, mediainfo):
7 | super().__init__(reporter, mediainfo, "Error printing audio track names")
8 |
9 | # overriding abstract method
10 | def get_reply(self):
11 | reply = ""
12 |
13 | if len(self.mediainfo["audio"]) > 0:
14 | reply += "Audio Track Names:\n"
15 | reply += "```"
16 | for i, _ in enumerate(self.mediainfo["audio"]):
17 | reply += self._section_id("audio", i) + ": "
18 | if "title" in self.mediainfo["audio"][i]:
19 | reply += self.mediainfo["audio"][i]["title"] + "\n"
20 | reply += "```"
21 | else:
22 | reply = self.reporter.print_report("error", "No audio tracks")
23 |
24 | return reply
25 |
--------------------------------------------------------------------------------
/vdator/checks/print_chapters.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | import re
4 |
5 |
6 | class CheckPrintChapters(Check):
7 | def __init__(self, reporter, mediainfo):
8 | super().__init__(reporter, mediainfo, "Error printing chapters")
9 |
10 | # overriding abstract method
11 | def get_reply(self):
12 | reply = ""
13 |
14 | if len(self.mediainfo["menu"]) > 0:
15 | for i, menu in enumerate(self.mediainfo["menu"]):
16 | reply += f"> **Chapters {i + 1}**\n"
17 | numbered_chapters = True
18 | for ch in menu:
19 | for title in ch["titles"]:
20 | if not re.search(
21 | r"^chapter\s\d+", title["title"], re.IGNORECASE
22 | ):
23 | numbered_chapters = False
24 |
25 | if not numbered_chapters:
26 | reply += "```"
27 | for ch in menu:
28 | if ch["time"]:
29 | reply += ch["time"] + " :"
30 | for title in ch["titles"]:
31 | if title["language"]:
32 | reply += " lang: " + title["language"]
33 | if title["title"]:
34 | reply += " title: " + title["title"]
35 | reply += "\n"
36 | reply += "```"
37 | else:
38 | reply += self.reporter.print_report("info", "Chapters are numbered")
39 | if len(menu[0]["languages"]) > 0 and menu[0]["languages"][0] != "":
40 | reply += (
41 | "Chapter languages: `" + ", ".join(menu[0]["languages"]) + "`\n"
42 | )
43 | else:
44 | reply += self.reporter.print_report("info", "No chapters")
45 |
46 | return reply
47 |
--------------------------------------------------------------------------------
/vdator/checks/print_text_tracks.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 |
5 | class CheckPrintTextTracks(Check, SectionId):
6 | def __init__(self, reporter, mediainfo):
7 | super().__init__(reporter, mediainfo, "Error printing text tracks")
8 |
9 | # overriding abstract method
10 | def get_reply(self):
11 | reply = ""
12 | if len(self.mediainfo["text"]) > 0:
13 | reply += "```"
14 | for i, _ in enumerate(self.mediainfo["text"]):
15 | reply += self._section_id("text", i) + ":"
16 | if "default" in self.mediainfo["text"][i]:
17 | reply += " default:" + self.mediainfo["text"][i]["default"]
18 | if "forced" in self.mediainfo["text"][i]:
19 | reply += " forced:" + self.mediainfo["text"][i]["forced"]
20 | if "language" in self.mediainfo["text"][i]:
21 | reply += " language:" + self.mediainfo["text"][i]["language"]
22 | if "title" in self.mediainfo["text"][i]:
23 | reply += " title: " + self.mediainfo["text"][i]["title"]
24 | reply += "\n"
25 | reply += "```"
26 | else:
27 | reply += self.reporter.print_report("info", "No text tracks")
28 | return reply
29 |
--------------------------------------------------------------------------------
/vdator/checks/remove_until_first_codec.py:
--------------------------------------------------------------------------------
1 | class RemoveUntilFirstCodec(object):
2 | def __init__(self, codecs):
3 | self.codecs = codecs
4 |
5 | def remove(self, title):
6 | title2, title_parts, found = title, list(), False
7 | if " / " in title:
8 | for part in title.split(" / "):
9 | if self.codecs.is_audio_title(part):
10 | # stop when we get first codec
11 | found = True
12 | break
13 | else:
14 | title2_split = title2.split(" / ")
15 | # remove part since its not a codec
16 | title2 = " / ".join(title2_split[1:]).strip()
17 | # save part in list
18 | title_parts.append(title2_split[0])
19 | return title2, title_parts, found
20 |
--------------------------------------------------------------------------------
/vdator/checks/text_default_flag.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 |
4 | class CheckTextDefaultFlag(Check):
5 | def __init__(self, reporter, mediainfo):
6 | super().__init__(reporter, mediainfo, "Error checking text track default flag")
7 |
8 | # overriding abstract method
9 | def get_reply(self):
10 | # english subs for foreign films should be default=yes
11 | reply = ""
12 |
13 | if len(self.mediainfo["text"]) > 0:
14 | first_audio_language, has_english_subs, english_subs_default_yes = (
15 | False,
16 | False,
17 | False,
18 | )
19 |
20 | if has(self.mediainfo, "audio.0.language"):
21 | first_audio_language = self.mediainfo["audio"][0]["language"].lower()
22 |
23 | if first_audio_language != "english":
24 | # text tracks with language and default keys
25 | text_with_properties = [
26 | item
27 | for item in self.mediainfo["text"]
28 | if ("language" in item and "default" in item)
29 | ]
30 | for item in text_with_properties:
31 | if item["language"].lower() == "english":
32 | has_english_subs = True
33 | if item["default"].lower() == "yes":
34 | english_subs_default_yes = True
35 | if has_english_subs and english_subs_default_yes:
36 | break
37 |
38 | if has_english_subs:
39 | # foreign audio and has english subs. english subs should be default=yes
40 | if english_subs_default_yes:
41 | reply += self.reporter.print_report(
42 | "correct",
43 | "Foreign film, one of the English subtitles are `default=yes`",
44 | )
45 | else:
46 | reply += self.reporter.print_report(
47 | "error",
48 | "Foreign film, one of the English subtitles should be `default=yes`",
49 | )
50 |
51 | return reply
52 |
--------------------------------------------------------------------------------
/vdator/checks/text_order.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import IsCommentaryTrack, SectionId
3 |
4 | from collections import OrderedDict
5 | import re
6 |
7 |
8 | class CheckTextOrder(Check, IsCommentaryTrack, SectionId):
9 | """
10 | Checks text track order:
11 | Languages are in alphabetical order with English first
12 | Within language: No title, SDH, alphabetical
13 | Commentary subtitles after regular subtitles
14 | """
15 |
16 | def __init__(self, reporter, mediainfo):
17 | super().__init__(
18 | reporter,
19 | mediainfo,
20 | "Error checking text track order",
21 | )
22 |
23 | # overriding abstract method
24 | def get_reply(self):
25 | reply = ""
26 |
27 | if len(self.mediainfo["text"]) == 0:
28 | return reply
29 |
30 | # text_langs = ['German', 'English', ...]
31 | text_langs = [
32 | self._format_lang(text["language"]) for text in self.mediainfo["text"]
33 | ]
34 | # remove duplicates from list and preserve order
35 | text_langs = list(dict.fromkeys(text_langs))
36 |
37 | # list of text tracks by language
38 | #
39 | # OrderedDict([
40 | # ('English', [{}, ...]), ('German', [{}, ...])
41 | # ])
42 | #
43 | text_tracks_by_lang = OrderedDict((k, list()) for k in text_langs)
44 | commentary_tracks_by_lang = OrderedDict((k, list()) for k in text_langs)
45 | has_commentary = False
46 |
47 | # get tracks by language, and separate commentary tracks
48 | for i, text in enumerate(self.mediainfo["text"]):
49 | text["title"] = text["title"] if "title" in text else ""
50 | if self._is_commentary_track(text["title"]):
51 | commentary_tracks_by_lang[self._format_lang(text["language"])].append(
52 | text
53 | )
54 | has_commentary = True
55 | else:
56 | text_tracks_by_lang[self._format_lang(text["language"])].append(text)
57 | # forced english track should be first
58 | reply += self._forced_english_track_first(i, text)
59 |
60 | # languages should be in alphabetical order with English first
61 | reply += self._languages_in_order(text_tracks_by_lang, "Regular subs: ")
62 | if has_commentary:
63 | reply += self._languages_in_order(
64 | commentary_tracks_by_lang, "Commentary subs: "
65 | )
66 |
67 | # subtitles in order within language: no title, SDH, rest in alphabetical order
68 | reply += "**Expected order within language:** No title, SDH, alphabetical\n"
69 | reply += self._subs_in_order_within_language(
70 | text_tracks_by_lang, "Regular subs: "
71 | )
72 | if has_commentary:
73 | reply += self._subs_in_order_within_language(
74 | commentary_tracks_by_lang, "Commentary subs: "
75 | )
76 |
77 | # commentary tracks should be after regular subs
78 | if has_commentary:
79 | reply += self._commentary_last(
80 | text_tracks_by_lang, commentary_tracks_by_lang
81 | )
82 |
83 | return reply
84 |
85 | def _format_lang(self, lang):
86 | """
87 | Format a text language to remove parenthesis
88 | English (US) becomes English
89 | """
90 | return re.sub(r"\([^)]*\)", "", lang).strip()
91 |
92 | def _forced_english_track_first(self, i, text_track):
93 | """
94 | Forced english track should be first
95 | Only checks tracks without titles, since titles have a predefined order: No title, SDH, alphabetical
96 | """
97 | reply = ""
98 |
99 | is_forced_track = (
100 | text_track["forced"].lower() == "yes" if "forced" in text_track else False
101 | )
102 | is_english_track = text_track["language"].lower() == "english"
103 | # only checks tracks without titles
104 | title_is_blank = text_track["title"] == ""
105 | is_first_track = i == 0
106 |
107 | if (
108 | is_forced_track
109 | and is_english_track
110 | and title_is_blank
111 | and not is_first_track
112 | ):
113 | # forced english track should be first
114 | reply += self.reporter.print_report(
115 | "error",
116 | "Text {} is a forced English track, it should be first".format(
117 | self._section_id("text", i)
118 | ),
119 | )
120 |
121 | return reply
122 |
123 | def _languages_in_order(self, text_tracks_by_lang, prefix=""):
124 | """Languages should be in alphabetical order with English first"""
125 | reply = ""
126 | text_track_langs_order = list(text_tracks_by_lang.keys())
127 | text_track_langs_expected_order = self._sort_sub_langs(text_track_langs_order)
128 |
129 | if text_track_langs_expected_order == text_track_langs_order:
130 | reply += self.reporter.print_report(
131 | "correct",
132 | prefix + "Languages are in alphabetical order with English first",
133 | )
134 | else:
135 | reply += self.reporter.print_report(
136 | "error",
137 | prefix
138 | + "Languages should be in alphabetical order with English first. Expected: `"
139 | + ", ".join(text_track_langs_expected_order)
140 | + "`",
141 | )
142 |
143 | return reply
144 |
145 | def _commentary_last(self, text_tracks_by_lang, commentary_tracks_by_lang):
146 | """Commentary tracks should be last"""
147 | reply = ""
148 |
149 | if len(commentary_tracks_by_lang) > 0:
150 | last_text_id = self._get_last_text_id(text_tracks_by_lang)
151 | if last_text_id != -1:
152 | try:
153 | first_commentary = next(iter(commentary_tracks_by_lang.values()))
154 | if first_commentary and has(first_commentary, "0.id"):
155 | try:
156 | if last_text_id > int(first_commentary[0]["id"]):
157 | # commentary tracks should be after regular subs
158 | reply += self.reporter.print_report(
159 | "error",
160 | "Commentary subs should be after regular subs",
161 | )
162 | else:
163 | # commentary tracks are after regular subs
164 | reply += self.reporter.print_report(
165 | "correct",
166 | "Commentary subs are after regular subs",
167 | )
168 | except ValueError:
169 | pass
170 | except StopIteration:
171 | pass
172 |
173 | return reply
174 |
175 | def _subs_in_order_within_language(self, text_tracks_by_lang, prefix=""):
176 | """
177 | Subtitles in order within language
178 | No title, SDH, rest in alphabetical order
179 | """
180 | reply = ""
181 | for k, v in text_tracks_by_lang.items():
182 | # k = 'English'
183 | # v = tracks list [{}, ...]
184 |
185 | v_ids = [track["id"] for track in v]
186 | expected_order = self._sort_subs_within_lang(v)
187 | expected_order_ids = [track["id"] for track in expected_order]
188 |
189 | if v_ids != expected_order_ids:
190 | # subs for language are out of order
191 | reply += self.reporter.print_report(
192 | "warning",
193 | prefix
194 | + "Language: `{}`: Subtitles should be in order: `{}`".format(
195 | k, ", ".join(expected_order_ids)
196 | ),
197 | )
198 |
199 | return reply
200 |
201 | def _get_last_text_id(self, text_tracks_by_lang):
202 | """Get track id of last subtitle"""
203 | last_text_id = -1
204 | for _, tracks in text_tracks_by_lang.items():
205 | for track in tracks:
206 | try:
207 | curr_text_id = int(track["id"])
208 | if curr_text_id > last_text_id:
209 | last_text_id = curr_text_id
210 | except ValueError:
211 | continue
212 | return last_text_id
213 |
214 | def _sort_sub_langs(self, languages):
215 | """
216 | Sort subs by language
217 | English first, rest in alphabetical order
218 | """
219 | # English tracks first
220 | tracks = [lang for lang in languages if lang.lower() == "english"]
221 | # rest of the tracks by language in alphabetical order
222 | rest = sorted([lang for lang in languages if lang.lower() != "english"])
223 |
224 | # add the rest of the tracks
225 | if rest:
226 | tracks.extend(rest)
227 |
228 | return tracks
229 |
230 | def _sort_subs_within_lang(self, text_tracks):
231 | """
232 | Sort subtitles within languages
233 | No title, SDH, rest in alphabetical order
234 | """
235 | # ['', 'SDH', '...']
236 | unparsed = text_tracks.copy()
237 | parsed = []
238 |
239 | # add tracks with no title
240 | for track in unparsed:
241 | if track["title"] == "":
242 | parsed.append(track)
243 | unparsed = [track for track in unparsed if track["title"] != ""]
244 |
245 | # add tracks with SDH
246 | tracks_with_SDH = []
247 | for track in unparsed:
248 | if "SDH" in track["title"].split():
249 | tracks_with_SDH.append(track)
250 | tracks_with_SDH = sorted(tracks_with_SDH, key=lambda track: track["title"])
251 | if tracks_with_SDH:
252 | parsed.extend(tracks_with_SDH)
253 | unparsed = [
254 | track for track in unparsed if ("SDH" not in track["title"].split())
255 | ]
256 |
257 | # sort rest of the tracks in alphabetical order
258 | unparsed = sorted(unparsed, key=lambda track: track["title"])
259 |
260 | # add the rest of the tracks
261 | if unparsed:
262 | parsed.extend(unparsed)
263 |
264 | return parsed
265 |
--------------------------------------------------------------------------------
/vdator/checks/tracks_have_language.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 | from .mixins import SectionId
3 |
4 |
5 | class CheckTracksHaveLanguage(Check, SectionId):
6 | def __init__(self, reporter, mediainfo):
7 | super().__init__(reporter, mediainfo, "Error checking if tracks have language")
8 |
9 | # overriding abstract method
10 | def get_reply(self):
11 | reply, is_valid = "", True
12 |
13 | for section in ["video", "audio", "text"]:
14 | for i, _ in enumerate(self.mediainfo[section]):
15 | if "language" not in self.mediainfo[section][i]:
16 | reply += self.reporter.print_report(
17 | "error",
18 | section.capitalize()
19 | + " "
20 | + self._section_id(section, i)
21 | + ": Does not have a language chosen",
22 | )
23 | is_valid = False
24 |
25 | if is_valid:
26 | reply += self.reporter.print_report(
27 | "correct", "All tracks have a language chosen"
28 | )
29 |
30 | return reply
31 |
--------------------------------------------------------------------------------
/vdator/checks/video_language_matches_first_audio_language.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 |
4 | class CheckVideoLanguageMatchesFirstAudioLanguage(Check):
5 | def __init__(self, reporter, mediainfo):
6 | super().__init__(
7 | reporter,
8 | mediainfo,
9 | "Error checking that video language matches first audio language",
10 | )
11 |
12 | # overriding abstract method
13 | def get_reply(self):
14 | reply = ""
15 |
16 | if not has(self.mediainfo, "video.0.language"):
17 | reply += self.reporter.print_report("error", "Video language not set")
18 | return reply
19 | if not has(self.mediainfo, "audio.0.language"):
20 | reply += self.reporter.print_report("error", "First audio language not set")
21 | return reply
22 | if (
23 | self.mediainfo["video"][0]["language"]
24 | == self.mediainfo["audio"][0]["language"]
25 | ):
26 | reply += self.reporter.print_report(
27 | "correct",
28 | "Video language matches first audio language: `"
29 | + self.mediainfo["video"][0]["language"]
30 | + "`",
31 | )
32 | else:
33 | reply += self.reporter.print_report(
34 | "error",
35 | "Video language does not match first audio language: `"
36 | + self.mediainfo["video"][0]["language"]
37 | + "` vs `"
38 | + self.mediainfo["audio"][0]["language"]
39 | + "`",
40 | )
41 | return reply
42 |
--------------------------------------------------------------------------------
/vdator/checks/video_track.py:
--------------------------------------------------------------------------------
1 | from .check import *
2 |
3 | import re
4 |
5 |
6 | class CheckVideoTrack(Check):
7 | def __init__(self, reporter, source_detector, codecs, mediainfo, bdinfo):
8 | super().__init__(reporter, mediainfo, "Error checking video track name")
9 | self.source_detector = source_detector
10 | self.codecs = codecs
11 | self.bdinfo = bdinfo
12 |
13 | # overriding abstract method
14 | def get_reply(self):
15 | reply = ""
16 |
17 | if (
18 | has_many(
19 | self.mediainfo,
20 | "video.0",
21 | [
22 | "format",
23 | "format_version",
24 | "bit_rate",
25 | "height",
26 | "scan_type",
27 | "frame_rate",
28 | "display_aspect_ratio",
29 | "title",
30 | ],
31 | )
32 | and self.source_detector.is_dvd()
33 | ):
34 | # dvd video title from mediainfo
35 | video_title = self._dvd_video_title_from_mediainfo()
36 | mediainfo_title = self.mediainfo["video"][0]["title"]
37 |
38 | if mediainfo_title == video_title:
39 | reply += self.reporter.print_report(
40 | "correct",
41 | "Video track names match: ```" + mediainfo_title + "```",
42 | new_line=False,
43 | )
44 | else:
45 | reply += self.reporter.print_report(
46 | "error",
47 | "Video track names missmatch:\n```fix\nExpected: "
48 | + video_title
49 | + "\nMediaInfo: "
50 | + mediainfo_title
51 | + "```",
52 | new_line=False,
53 | )
54 | reply += show_diff(mediainfo_title, video_title)
55 |
56 | elif has(self.bdinfo, "video") and has(self.mediainfo, "video"):
57 | if len(self.bdinfo["video"]) < 1:
58 | reply += self.reporter.print_report(
59 | "error", "Missing bdinfo video track"
60 | )
61 | return reply
62 | elif len(self.mediainfo["video"]) < 1:
63 | reply += self.reporter.print_report(
64 | "error", "Missing mediainfo video track"
65 | )
66 | return reply
67 |
68 | if has(self.mediainfo, "video.0.title") and has(self.bdinfo, "video.0"):
69 | mediainfo_video_title = self.mediainfo["video"][0]["title"]
70 | bdinfo_video_title = self.bdinfo["video"][0]
71 |
72 | # 1080i @ 25fps is actually progressive
73 | reply += self._actually_progressive()
74 |
75 | bitrate_search = re.search(r"(\d+\.\d+)\skbps", mediainfo_video_title)
76 | if bitrate_search:
77 | # if mediainfo has a decimal kbps bitrate, use it in the bdinfo for comparison
78 | percise_bitrate = bitrate_search.group(1)
79 | percise_kbps = percise_bitrate + " kbps"
80 | bdinfo_video_title = re.sub(
81 | r"(\d+)\skbps", percise_kbps, bdinfo_video_title
82 | )
83 | if self.source_detector.is_dv() and mediainfo_video_title.startswith(
84 | bdinfo_video_title
85 | ):
86 | # if source is dolby vision, only check that the first part of mediainfo video title
87 | # matches bdinfo video title. Up to BT.2020, i.e. Dolby Vision FEL is not checked
88 | reply += self.reporter.print_report(
89 | "correct",
90 | "Video track names match: ```" + mediainfo_video_title + "```",
91 | new_line=False,
92 | )
93 | elif bdinfo_video_title == mediainfo_video_title:
94 | reply += self.reporter.print_report(
95 | "correct",
96 | "Video track names match: ```" + bdinfo_video_title + "```",
97 | new_line=False,
98 | )
99 | else:
100 | reply += self.reporter.print_report(
101 | "error",
102 | "Video track names missmatch:\n```fix\nBDInfo: "
103 | + bdinfo_video_title
104 | + "\nMediaInfo: "
105 | + mediainfo_video_title
106 | + "```",
107 | new_line=False,
108 | )
109 | reply += show_diff(mediainfo_video_title, bdinfo_video_title)
110 | else:
111 | reply += self.reporter.print_report(
112 | "error", "Missing mediainfo video track"
113 | )
114 | return reply
115 | else:
116 | reply += self.reporter.print_report("error", "Could not verify video track")
117 |
118 | return reply
119 |
120 | def _dvd_video_title_from_mediainfo(self):
121 | # dictionary existence already checked
122 |
123 | video_title = ""
124 | # MPEG-
125 | video_title += self.mediainfo["video"][0]["format"].split()[0] + "-"
126 |
127 | # 1
128 | video_title += "".join(
129 | re.findall(r"[\d]+", self.mediainfo["video"][0]["format_version"])
130 | )
131 | video_title += " Video / "
132 |
133 | # bitrate
134 | video_title += (
135 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["bit_rate"]))
136 | + " kbps"
137 | )
138 | video_title += " / "
139 |
140 | # height
141 | video_title += "".join(
142 | re.findall(r"[\d]+", self.mediainfo["video"][0]["height"])
143 | )
144 |
145 | # scan type
146 | (scan_type, _) = self.codecs.get_scan_type_title_name(
147 | self.mediainfo["video"][0]["scan_type"].lower(), 0
148 | )
149 | video_title += scan_type
150 | video_title += " / "
151 |
152 | # fps
153 | video_fps = float(
154 | "".join(re.findall(r"\d+\.\d+", self.mediainfo["video"][0]["frame_rate"]))
155 | )
156 | if video_fps.is_integer():
157 | video_fps = int(video_fps)
158 | video_title += str(video_fps) + " fps / "
159 |
160 | # aspect ratio
161 | video_title += self.mediainfo["video"][0]["display_aspect_ratio"]
162 |
163 | return video_title
164 |
165 | def _actually_progressive(self):
166 | # dictionary existence already checked
167 |
168 | reply = ""
169 |
170 | bdinfo_video_title = self.bdinfo["video"][0]
171 | bdinfo_video_parts = bdinfo_video_title.split(" / ")
172 |
173 | if len(bdinfo_video_parts) >= 3:
174 | scan_type = bdinfo_video_parts[2].strip()[-1].lower()
175 | video_fps = float(
176 | "".join(
177 | re.findall(r"\d*\.\d+|\d+", bdinfo_video_parts[3].strip().lower())
178 | )
179 | )
180 | (_, actually_progressive) = self.codecs.get_scan_type_title_name(
181 | scan_type, video_fps
182 | )
183 | if actually_progressive:
184 | reply += self.reporter.print_report(
185 | "info", "Note: 1080i @ 25fps is actually progressive"
186 | )
187 |
188 | return reply
189 |
--------------------------------------------------------------------------------
/vdator/data/codecs.json:
--------------------------------------------------------------------------------
1 | {
2 | "codecs": {
3 | "video": {
4 | "h264/AVC": ".h264",
5 | "h264/MVC": ".mvc",
6 | "h265/HEVC": ".h265",
7 | "MPEG1": ".m1v",
8 | "MPEG2": ".m2v",
9 | "VC-1": ".vc1"
10 | },
11 | "video_3d": {
12 | "h264/MVC": ".mvc"
13 | },
14 | "audio": {
15 | "AC3": ".ac3",
16 | "AC3 EX": ".ac3",
17 | "AC3 Surround": ".ac3",
18 | "DTS Hi-Res": ".dtshr",
19 | "DTS Master Audio": ".dtsma",
20 | "DTS": ".dts",
21 | "FLAC Audio": ".flac",
22 | "RAW/PCM": ".pcm",
23 | "TrueHD/AC3": ".thd",
24 | "TrueHD/AC3 (Atmos)": ".thd"
25 | },
26 | "subtitles": {
27 | "Subtitle (PGS)": ".sup",
28 | "Subtitle (DVD)": ".sup"
29 | },
30 | "chapters": {
31 | "Chapters": ".txt"
32 | }
33 | },
34 | "track_titles": {
35 | "video": {
36 | "MPEG-1 Video": "MPEG-1",
37 | "MPEG-2 Video": "MPEG-2",
38 | "MPEG-4 AVC Video": "AVC",
39 | "MPEG-H HEVC Video": "HEVC",
40 | "VC-1 Video": "VC-1"
41 | },
42 | "audio": {
43 | "DTS Audio": "DTS",
44 | "DTS-HD High-Res Audio": "DTS-HD.HR",
45 | "DTS-HD Master Audio": "DTS-HD.MA",
46 | "DTS:X Master Audio": "DTS-X",
47 | "Dolby Digital Audio": "DD",
48 | "Dolby Digital EX Audio": "DD-EX",
49 | "Dolby Digital Plus Audio": "DDP",
50 | "Dolby TrueHD Audio": "TrueHD",
51 | "Dolby TrueHD/Atmos Audio": "TrueHD.Atmos",
52 | "FLAC Audio": "FLAC"
53 | }
54 | },
55 | "scan_types": {
56 | "interlaced" : "i",
57 | "mbaff" : "i",
58 | "progressive" : "p"
59 | }
60 | }
--------------------------------------------------------------------------------
/vdator/data/urls.json:
--------------------------------------------------------------------------------
1 | {
2 | "urls": {
3 | "dpaste.com": {
4 | "slug_regex": "https?://dpaste.com/(.*)",
5 | "raw_url_regex": "https?://dpaste.com/{}.txt",
6 | "raw_url": "https://dpaste.com/{}.txt"
7 | },
8 | "dpaste.org": {
9 | "slug_regex": "https?://dpaste.org/(.*)",
10 | "raw_url_regex": "https?://dpaste.org/{}/raw",
11 | "raw_url": "https://dpaste.org/{}/raw"
12 | },
13 | "hastebin.com": {
14 | "slug_regex": "https?://hastebin.com/(.*)",
15 | "raw_url_regex": "https?://hastebin.com/raw/{}",
16 | "raw_url": "https://hastebin.com/raw/{}"
17 | },
18 | "www.heypasteit.com": {
19 | "slug_regex": "https?://www.heypasteit.com/clip/(.*)",
20 | "raw_url_regex": "https?://www.heypasteit.com/download/{}",
21 | "raw_url": "https://www.heypasteit.com/download/{}"
22 | },
23 | "paste.centos.org": {
24 | "slug_regex": "https?://paste.centos.org/view/(.*)",
25 | "raw_url_regex": "https?://paste.centos.org/view/raw/{}",
26 | "raw_url": "https://paste.centos.org/view/raw/{}"
27 | },
28 | "paste.ee": {
29 | "slug_regex": "https?://paste.ee/p/(.*)",
30 | "raw_url_regex": "https?://paste.ee/d/{}",
31 | "raw_url": "https://paste.ee/d/{}"
32 | },
33 | "paste.opensuse.org": {
34 | "slug_regex": "https?://paste.opensuse.org/(.*)",
35 | "raw_url_regex": "https?://paste.opensuse.org/view/raw/{}",
36 | "raw_url": "https://paste.opensuse.org/view/raw/{}"
37 | },
38 | "pastebin.com": {
39 | "slug_regex": "https?://pastebin.com/(.*)",
40 | "raw_url_regex": "https?://pastebin.com/raw/{}",
41 | "raw_url": "https://pastebin.com/raw/{}"
42 | },
43 | "rentry.co": {
44 | "slug_regex": "https?://rentry.co/(.*)",
45 | "raw_url_regex": "https?://rentry.co/{}/raw",
46 | "raw_url": "https://rentry.co/{}/raw"
47 | },
48 | "termbin.com": {
49 | "slug_regex": "https?://termbin.com/(.*)",
50 | "raw_url_regex": "https?://termbin.com/{}",
51 | "raw_url": "https://termbin.com/{}"
52 | },
53 | "textbin.net": {
54 | "slug_regex": "https?://textbin.net/(.*)",
55 | "raw_url_regex": "https?://textbin.net/raw/{}",
56 | "raw_url": "https://textbin.net/raw/{}"
57 | }
58 | }
59 | }
--------------------------------------------------------------------------------
/vdator/helpers.py:
--------------------------------------------------------------------------------
1 | from pydash import has
2 | import difflib
3 |
4 |
5 | def balanced_blockquotes(str):
6 | """
7 | Check if blockquotes are balanced
8 |
9 | Parameters
10 | ----------
11 | str : str
12 | text
13 |
14 | Returns
15 | -------
16 | True if blockquotes are balanced, False otherwise
17 | """
18 | num_blockquotes = str.count("```")
19 | # balanced if even number of blockquotes
20 | return (num_blockquotes % 2) == 0
21 |
22 |
23 | def split_string(str, limit, sep="\n"):
24 | """
25 | Split string
26 |
27 | Parameters
28 | ----------
29 | str : str
30 | string to split
31 |
32 | limit : int
33 | string length limit
34 |
35 | sep : str
36 | separator
37 | default: "\n"
38 |
39 | Returns
40 | -------
41 | True if blockquotes are balanced, False otherwise
42 | """
43 | limit = int(limit)
44 | words = str.split(sep)
45 |
46 | if max(map(len, words)) > limit:
47 | # limit is too small, return original string
48 | return str
49 |
50 | res, part, others = [], words[0], words[1:]
51 | for word in others:
52 | if (len(sep) + len(word)) > (limit - len(part)):
53 | res.append(part)
54 | part = word
55 | else:
56 | part += sep + word
57 | if part:
58 | res.append(part)
59 |
60 | return res
61 |
62 |
63 | def has_many(obj, base, keys):
64 | """
65 | Check if object has many keys
66 |
67 | Parameters
68 | ----------
69 | obj : object
70 | object to test
71 |
72 | base : str
73 | base key path
74 |
75 | keys : list
76 | keys to test
77 |
78 | Returns
79 | -------
80 | True if all keys exist, False otherwise
81 | """
82 | for key in keys:
83 | lookup = ""
84 | if base:
85 | lookup += base + "."
86 | lookup += key
87 | if not has(obj, lookup):
88 | return False
89 | return True
90 |
91 |
92 | def num_to_emoji(n):
93 | """
94 | Convert number to discord emoji
95 |
96 | Parameters
97 | ----------
98 | n : str
99 | string number
100 |
101 | Returns
102 | -------
103 | str discord emoji if valid, False otherwise
104 | """
105 | num_emoji_map = {
106 | "1": ":one:",
107 | "2": ":two:",
108 | "3": ":three:",
109 | "4": ":four:",
110 | "5": ":five:",
111 | "6": ":six:",
112 | "7": ":seven:",
113 | "8": ":eight:",
114 | "9": ":nine:",
115 | "10": ":ten:",
116 | }
117 |
118 | n = str(n)
119 | if n in num_emoji_map:
120 | return num_emoji_map[n]
121 | return False
122 |
123 |
124 | def show_diff(actual, expected):
125 | """
126 | Show difference between expected and actual result
127 | using discord bold format
128 |
129 | Parameters
130 | ----------
131 | actual : str
132 | actual result
133 |
134 | expected : str
135 | expected result
136 |
137 | Returns
138 | -------
139 | str with differences in bold
140 | """
141 | seqm = difflib.SequenceMatcher(None, actual, expected)
142 |
143 | output = []
144 | for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
145 | if opcode == "equal":
146 | output.append(seqm.a[a0:a1])
147 | elif opcode == "insert":
148 | output.append("**" + seqm.b[b0:b1] + "**")
149 | elif opcode == "delete":
150 | output.append("~~" + seqm.a[a0:a1] + "~~")
151 | elif opcode == "replace":
152 | output.append("~~" + seqm.a[a0:a1] + "~~**" + seqm.b[b0:b1] + "**")
153 | else:
154 | # unexpected opcode
155 | continue
156 | return "Hint: " + "".join(output) + "\n"
157 |
158 |
159 | def is_float(value):
160 | try:
161 | float(value)
162 | return True
163 | except ValueError:
164 | return False
165 |
--------------------------------------------------------------------------------
/vdator/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from dotenv import load_dotenv
4 | import json, os, requests, traceback
5 |
6 | # APIs
7 | import discord
8 | from discord.utils import get
9 |
10 | # parsers
11 | from helpers import balanced_blockquotes, split_string
12 | from parsers import *
13 | from source_detector import SourceDetector
14 | from reporter import Reporter, add_status_reactions
15 | from checker import Checker
16 | from checks.remove_until_first_codec import RemoveUntilFirstCodec
17 |
18 |
19 | # script location
20 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
21 |
22 | # initialize parsers
23 | with open(os.path.join(__location__, "data/urls.json")) as f:
24 | urls = json.load(f)["urls"]
25 | url_parser = URLParser(urls)
26 |
27 | bdinfo_parser = BDInfoParser()
28 | paste_parser = PasteParser(bdinfo_parser)
29 | mediainfo_parser = MediaInfoParser()
30 |
31 | with open(os.path.join(__location__, "data/codecs.json")) as f:
32 | codecs = json.load(f)
33 | codecs_parser = CodecsParser(codecs)
34 |
35 | source_detector = SourceDetector()
36 | reporter = Reporter()
37 | checker = Checker(codecs_parser, source_detector, reporter)
38 |
39 | # load environment variables
40 | load_dotenv()
41 |
42 | # environment variables
43 | IN_GAME = os.environ.get("IN_GAME").strip()
44 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip()
45 |
46 | # channels to listen in and add reactions
47 | REVIEW_CHANNELS = [x.strip() for x in os.environ.get("REVIEW_CHANNELS").split(",")]
48 |
49 | # channels to send full summary to if from review channel
50 | REVIEW_REPLY_CHANNELS = [
51 | x.strip() for x in os.environ.get("REVIEW_REPLY_CHANNELS").split(",")
52 | ]
53 |
54 | # channels to listen in and post full summaries
55 | BOT_CHANNELS = [x.strip() for x in os.environ.get("BOT_CHANNELS").split(",")]
56 |
57 | VERSION = "1.4.0"
58 |
59 |
60 | def print_help():
61 | return (
62 | "vdator " + VERSION + " help: "
63 | "I take a Pastebin link with BDInfo and MediaInfo dump."
64 | " I ignore all input after the line `" + IGNORE_AFTER_LINE + "`."
65 | " I add reactions in the following review channels: `"
66 | + ", ".join(REVIEW_CHANNELS)
67 | + "`,"
68 | + " I reply with full summary from review channels to: `"
69 | + ", ".join(REVIEW_REPLY_CHANNELS)
70 | + "`"
71 | + " and post full summaries in: `"
72 | + ", ".join(BOT_CHANNELS)
73 | + "`."
74 | " Add a minus (-) sign in front of unused audio tracks in BDInfo."
75 | " I check:```"
76 | "Movie/TV name format\n"
77 | "IMDB/TMDB ids\n"
78 | "Filename\n"
79 | "Video language matches first audio language\n"
80 | "No muxing mode\n"
81 | "Uses latest mkvtoolnix\n"
82 | "Video and audio track names match\n"
83 | "DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA\n"
84 | "Commentary to AC-3 @ 224 kbps\n"
85 | "Commentary track people and spellcheck\n"
86 | "Subtitle order\n"
87 | "Subtitle default flag\n"
88 | "Should have chapters\n"
89 | "Chapter languages\n"
90 | "Chapter padding```"
91 | "**Commands:** !help, !version"
92 | )
93 |
94 |
95 | def print_version():
96 | return "vdator " + VERSION
97 |
98 |
99 | intents = discord.Intents.default()
100 | intents.message_content = True
101 | client = discord.Client(intents=intents)
102 |
103 |
104 | @client.event
105 | async def on_ready():
106 | """
107 | Discord client is ready
108 | """
109 | print("I'm in")
110 | print(client.user)
111 | await client.change_presence(activity=discord.Game(name=IN_GAME))
112 |
113 |
114 | @client.event
115 | async def on_message(message):
116 | """
117 | Discord message event
118 |
119 | Parameters
120 | ----------
121 | message : discord.Message class
122 | discord message
123 | """
124 | # get name of channel message was sent in
125 | # if message is in a thread, the channel name is in message.channel.parent, otherwise its in message.channel.name
126 | channel_name = (
127 | str(message.channel.parent)
128 | if hasattr(message.channel, "parent")
129 | else str(message.channel.name)
130 | )
131 | channel = get(
132 | message.guild.channels,
133 | name=channel_name,
134 | type=discord.ChannelType.text,
135 | )
136 |
137 | # only listens in bot and review channels
138 | if not (channel_name in BOT_CHANNELS or channel_name in REVIEW_CHANNELS):
139 | return
140 |
141 | # help command
142 | if message.content == "!help":
143 | reply = print_help()
144 | await channel.send(reply)
145 | return
146 |
147 | # version command
148 | if message.content == "!version":
149 | reply = print_version()
150 | await channel.send(reply)
151 | return
152 |
153 | # self
154 | if message.author == client.user:
155 | # add status reactions to own messages
156 | await add_status_reactions(message, message.content)
157 | return
158 |
159 | supported_urls = url_parser.extract_supported_urls(message.content)
160 |
161 | for url in supported_urls:
162 | reply = "<" + url + ">" + "\n"
163 |
164 | try:
165 | # setup/reset reporter
166 | reporter.setup()
167 | # get paste
168 | r = requests.get(url)
169 | r.raise_for_status()
170 | paste = r.text
171 | except:
172 | traceback.print_exc()
173 | reply += reporter.print_report("fail", "Failed to get paste")
174 | else:
175 | try:
176 | (bdinfo, mediainfo, eac3to) = paste_parser.parse(paste)
177 | except:
178 | traceback.print_exc()
179 | reply += reporter.print_report("fail", "Paste parser failed")
180 | else:
181 | if mediainfo:
182 | try:
183 | # parse mediainfo
184 | mediainfo = mediainfo_parser.parse(mediainfo)
185 | except:
186 | traceback.print_exc()
187 | reply += reporter.print_report(
188 | "fail", "Mediainfo parser failed"
189 | )
190 | else:
191 | try:
192 | remove_until_first_codec = RemoveUntilFirstCodec(
193 | codecs_parser
194 | )
195 | match_bdinfo_audio_to_mediainfo = (
196 | MatchBDInfoAudioToMediaInfo(
197 | remove_until_first_codec, bdinfo, mediainfo
198 | )
199 | )
200 | bdinfo[
201 | "audio"
202 | ] = (
203 | match_bdinfo_audio_to_mediainfo.match_bdinfo_audio_to_mediainfo()
204 | )
205 | bdinfo["audio"] = bdinfo_parser.expand_compat_tracks(
206 | bdinfo["audio"]
207 | )
208 | except:
209 | traceback.print_exc()
210 | reply += reporter.print_report(
211 | "fail", "Matching bdinfo audio tracks to mediainfo"
212 | )
213 | else:
214 | try:
215 | # setup checker
216 | checker.setup(bdinfo, mediainfo, eac3to, channel_name)
217 | except:
218 | traceback.print_exc()
219 | reply += reporter.print_report(
220 | "fail", "vdator failed to setup checker"
221 | )
222 | else:
223 | try:
224 | reply += checker.run_checks()
225 | except:
226 | traceback.print_exc()
227 | reply += reporter.print_report(
228 | "fail", "vdator failed to parse"
229 | )
230 | else:
231 | reply += reporter.print_report(
232 | "error", "No mediainfo. Are you missing the `General` heading?"
233 | )
234 |
235 | # report
236 | reply += "> **Report**\n"
237 | reply += reporter.display_report()
238 |
239 | # split into multiple messages based on reply length
240 | BLOCK_QUOTES = "```"
241 | len_limit = (
242 | int(os.environ.get("DISCORD_MSG_CHAR_LIMIT")) - len(BLOCK_QUOTES) * 2
243 | )
244 | replies = split_string(reply, len_limit, "\n")
245 |
246 | # preserve blockquotes
247 | for i, r in enumerate(replies):
248 | if i == len(replies) - 1:
249 | break
250 | if not balanced_blockquotes(r):
251 | replies[i] += BLOCK_QUOTES
252 | replies[i + 1] = BLOCK_QUOTES + replies[i + 1]
253 |
254 | # fix blockquotes
255 | for i, r in enumerate(replies):
256 | replies[i] = replies[i].replace("``````", "```")
257 |
258 | if channel_name in BOT_CHANNELS:
259 | # reply in bot channel
260 | for reply in replies:
261 | await channel.send(reply)
262 | elif channel_name in REVIEW_CHANNELS:
263 | # add reactions in review channel
264 | await add_status_reactions(message, reply)
265 |
266 | # and send reply to
267 | for ch in REVIEW_REPLY_CHANNELS:
268 | review_reply_channel = get(
269 | message.guild.channels, name=ch, type=discord.ChannelType.text
270 | )
271 | for reply in replies:
272 | await review_reply_channel.send(reply)
273 |
274 |
275 | token = os.environ.get("DISCORD_BOT_SECRET")
276 | client.run(token)
277 |
--------------------------------------------------------------------------------
/vdator/nltk_people.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | from nltk.corpus import stopwords
3 |
4 |
5 | def download_nltk_data():
6 | # download nltk data
7 | ntlk_list = [
8 | "stopwords",
9 | "punkt",
10 | "averaged_perceptron_tagger",
11 | "maxent_ne_chunker",
12 | "words",
13 | ]
14 | for t in ntlk_list:
15 | nltk.download(t)
16 |
17 |
18 | def ie_preprocess(document):
19 | """
20 | nltk preprocess text
21 |
22 | Parameters
23 | ----------
24 | document : str
25 | text to pre process
26 |
27 | Returns
28 | -------
29 | list sentences
30 | """
31 | stop = stopwords.words("english")
32 | document = " ".join([i for i in document.split() if i not in stop])
33 | sentences = nltk.sent_tokenize(document)
34 | sentences = [nltk.word_tokenize(sent) for sent in sentences]
35 | sentences = [nltk.pos_tag(sent) for sent in sentences]
36 | return sentences
37 |
38 |
39 | def extract_names(document):
40 | """
41 | nltk extract person names
42 |
43 | Parameters
44 | ----------
45 | document : str
46 | text
47 |
48 | Returns
49 | -------
50 | list person names
51 | """
52 | names = []
53 | sentences = ie_preprocess(document)
54 | for tagged_sentence in sentences:
55 | for chunk in nltk.ne_chunk(tagged_sentence):
56 | if type(chunk) == nltk.tree.Tree:
57 | if chunk.label() == "PERSON":
58 | names.append(" ".join([c[0] for c in chunk]))
59 | return names
60 |
--------------------------------------------------------------------------------
/vdator/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from .bdinfo_parser import BDInfoParser
2 | from .codecs_parser import CodecsParser
3 | from .match_bdinfo_audio_to_mediainfo import MatchBDInfoAudioToMediaInfo
4 | from .media_info_parser import MediaInfoParser
5 | from .paste_parser import PasteParser
6 | from .url_parser import URLParser
7 |
--------------------------------------------------------------------------------
/vdator/parsers/bdinfo_parser.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | class BDInfoParser(object):
5 | """
6 | Parse BDInfo
7 | """
8 |
9 | def __init__(self):
10 | self.embedded_track_types = ["ac3 core", "ac3 embedded"]
11 | # ['-ac3 core', '-ac3 embedded']
12 | self.embedded_track_types_excluded = [
13 | "-" + t for t in self.embedded_track_types
14 | ]
15 | # ['\\(ac3 core:', '\\(ac3 embedded:']
16 | self.embedded_track_types_regex = [
17 | r"\(" + a + ":" for a in self.embedded_track_types
18 | ]
19 | # ['\\(-ac3 core:', '\\(-ac3 embedded:']
20 | self.embedded_track_types_excluded_regex = [
21 | r"\(-" + a + ":.*\)" for a in self.embedded_track_types
22 | ]
23 |
24 | def format_track_name(self, name):
25 | """
26 | Format track name
27 |
28 | Parameters
29 | ----------
30 | name : str
31 | track name
32 |
33 | Returns
34 | -------
35 | str formatted track name
36 | """
37 | # remove multiple and trailing spaces
38 | name = " ".join(name.split()).strip()
39 | return name
40 |
41 | def format_video_track_name(self, name):
42 | """
43 | Format video track name
44 |
45 | Parameters
46 | ----------
47 | name : str
48 | track name
49 |
50 | Returns
51 | -------
52 | str formatted video track name
53 | """
54 | name = self.format_track_name(name)
55 |
56 | # remove 3d
57 | name = name.replace(" / Left Eye", "")
58 | name = name.replace(" / Right Eye", "")
59 |
60 | # force decimal instead of comma in fps
61 | name2 = name.split("/")
62 | if len(name2) >= 4:
63 | name2[3] = name2[3].replace(",", ".")
64 | name = "/".join(name2)
65 |
66 | return name
67 |
68 | def format_audio_track_name(self, name):
69 | """
70 | Format track name
71 |
72 | Parameters
73 | ----------
74 | name : str
75 | track name
76 |
77 | Returns
78 | -------
79 | str formatted audio track name
80 | """
81 | # remove (DTS Core:...)
82 | name = re.sub(r"\(DTS Core:.*\)", "", name).strip()
83 |
84 | # remove excluded (-AC3 Core...) and (-AC3 Embedded...)
85 | for ending in self.embedded_track_types_excluded_regex:
86 | name = re.sub(ending, "", name, flags=re.IGNORECASE).strip()
87 |
88 | # remove dialog normalization
89 | # needs to be after removing (DTS Core:...)
90 | # since the dts core track can have dialog normalization which will break its regex
91 | if "DN" in name.upper() and " / " in name:
92 | name = name.rpartition(" / ")[0]
93 |
94 | name = self.format_track_name(name)
95 |
96 | return name
97 |
98 | def has_compat_track(self, audio_track_name):
99 | audio_track_name = audio_track_name.lower()
100 | for track_type in self.embedded_track_types:
101 | if track_type in audio_track_name:
102 | return True
103 | return False
104 |
105 | def format_audio_compatibility_track(self, audio_track):
106 | """
107 | Format audio compatibility track
108 |
109 | Parameters
110 | ----------
111 | audio_track : dict
112 | audio track
113 | dict{'name':'...', 'language':'...'}
114 |
115 | Returns
116 | -------
117 | audio track, compatibility track
118 | [dict{'name':'...', 'language':'...'}, dict{'name':'...', 'language':'...'}]
119 | """
120 | audio_track_name_lower = audio_track["name"].lower()
121 | for i, track_type in enumerate(self.embedded_track_types):
122 | if track_type in audio_track_name_lower:
123 | embedded_track_type_index = i
124 | break
125 |
126 | audio_parts = re.split(
127 | self.embedded_track_types_regex[embedded_track_type_index],
128 | audio_track["name"],
129 | flags=re.IGNORECASE,
130 | )
131 | audio_track["name"] = self.format_track_name(audio_parts[0])
132 |
133 | compat_track = {
134 | "name": self.format_track_name(
135 | "Compatibility Track / Dolby Digital Audio / "
136 | + audio_parts[1].strip().rstrip(")")
137 | ),
138 | "language": audio_track["language"],
139 | }
140 | return audio_track, compat_track
141 |
142 | def format_audio_track(self, name):
143 | """
144 | Split audio track with name and language
145 |
146 | Parameters
147 | ----------
148 | name : str
149 | track name
150 |
151 | Returns
152 | -------
153 | dict{'name':'...', 'language':'...'}
154 | """
155 | track = {"name": None, "language": None}
156 | name = name.strip()
157 | if " / " in name:
158 | name_parts = name.split(" / ", 1)
159 | track["name"] = self.format_audio_track_name(name_parts[1])
160 | track["language"] = name_parts[0]
161 | return track
162 |
163 | def format_subtitle_track(self, name):
164 | """
165 | Format subtitle track with language and bitrate
166 |
167 | Parameters
168 | ----------
169 | name : str
170 | track name
171 |
172 | Returns
173 | -------
174 | dict{'language':'...', 'bitrate':'...'}
175 | """
176 | track = {"language": None, "bitrate": None}
177 | name = name.strip()
178 | if " / " in name:
179 | name_parts = name.split(" / ", 1)
180 | track["language"] = name_parts[0].strip()
181 | track["bitrate"] = name_parts[1].strip()
182 | return track
183 |
184 | def playlist_report_format_video_track_name(self, name):
185 | """
186 | Format playlist report video track name
187 |
188 | Parameters
189 | ----------
190 | name : str
191 | track name
192 |
193 | Returns
194 | -------
195 | str formatted track name
196 | """
197 | try:
198 | parts = name.split()
199 | kbps_i = parts.index("kbps")
200 | before = " ".join(parts[: kbps_i - 1]).strip()
201 | after = " ".join(parts[kbps_i + 1 :]).strip()
202 | track_name = (
203 | before + " / " + parts[kbps_i - 1] + " " + parts[kbps_i] + " / " + after
204 | )
205 | track_name = self.format_video_track_name(track_name)
206 | return track_name
207 | except ValueError:
208 | return False
209 |
210 | def playlist_report_format_audio_track(self, name):
211 | """
212 | Format playlist report audio track
213 |
214 | Parameters
215 | ----------
216 | name : str
217 | track name
218 |
219 | Returns
220 | -------
221 | dict{'name':'...', 'language':'...'}
222 | """
223 | track = {"name": None, "language": None, "compat_track": None}
224 | try:
225 | name = name.strip()
226 | name_parts = name.split(" / ")
227 | name_parts0 = name_parts[0].strip().split()
228 | name = (
229 | " ".join(name_parts0[:-4])
230 | + " / "
231 | + name_parts0[-1]
232 | + " / "
233 | + " / ".join(name_parts[1:]).strip()
234 | )
235 | track["name"] = self.format_audio_track_name(name)
236 | track["language"] = name_parts0[3]
237 | return track
238 | except ValueError:
239 | return False
240 |
241 | def parse_quick_summary_line(self, bdinfo, l):
242 | """
243 | Parse quick summary line
244 |
245 | Parameters
246 | ----------
247 | bdinfo : dict
248 | bdinfo dict
249 | l : str
250 | quick summary line
251 |
252 | Returns
253 | -------
254 | bdinfo dict
255 | """
256 | l2 = l.strip().lower()
257 | # parse hidden tracks
258 | l2 = l2.lstrip("* ")
259 | if (
260 | l2.startswith("video:")
261 | or l2.startswith("audio:")
262 | or l2.startswith("subtitle:")
263 | ):
264 | track_name = l.split(":", 1)[1].strip()
265 | if l2.startswith("video:"):
266 | track_name = self.format_video_track_name(track_name)
267 | bdinfo["video"].append(track_name)
268 | elif l2.startswith("audio:"):
269 | audio_track = self.format_audio_track(track_name)
270 | if self.has_compat_track(audio_track["name"]):
271 | (
272 | audio_track,
273 | compat_track,
274 | ) = self.format_audio_compatibility_track(audio_track)
275 | audio_track["compat_track"] = compat_track
276 | bdinfo["audio"].append(audio_track)
277 | elif l2.startswith("subtitle:"):
278 | bdinfo["subtitle"].append(self.format_subtitle_track(track_name))
279 | else:
280 | # get all other bdinfo entries
281 | l = l.split(":", 1)
282 | if len(l) >= 2:
283 | bdinfo[l[0].strip().lower()] = l[1].strip()
284 | return bdinfo
285 |
286 | def expand_compat_tracks(self, bdinfo_audio):
287 | """
288 | Expand audio compatibility tracks into two tracks and keep order
289 |
290 | Returns
291 | -------
292 | audio_tracks list
293 | """
294 | audio_tracks = list()
295 | for audio_track in bdinfo_audio:
296 | audio_tracks.append(audio_track)
297 | if "compat_track" in audio_track:
298 | audio_tracks.append(audio_track["compat_track"])
299 |
300 | return audio_tracks
301 |
--------------------------------------------------------------------------------
/vdator/parsers/codecs_parser.py:
--------------------------------------------------------------------------------
1 | class CodecsParser(object):
2 | """
3 | Define codecs
4 | """
5 |
6 | def __init__(self, codecs):
7 | """
8 | Define codecs
9 |
10 | Parameters
11 | ----------
12 | codecs : dict
13 | codec definitions
14 | """
15 |
16 | """
17 | {
18 | "codecs": {
19 | "video": {...},
20 | "audio": {...},
21 | "subtitles": {...},
22 | "chapters": {...}
23 | },
24 | "track_titles": {
25 | "video": {...},
26 | "audio": {...}
27 | },
28 | "scan_types": {...}
29 | }
30 | """
31 | self.codecs = codecs
32 |
33 | # map of all codec names to extensions
34 | self.codec_ext = {
35 | **self.codecs["codecs"]["video"],
36 | **self.codecs["codecs"]["audio"],
37 | **self.codecs["codecs"]["subtitles"],
38 | **self.codecs["codecs"]["chapters"],
39 | }
40 |
41 | def is_video(self, codec):
42 | """
43 | Is this a video codec?
44 |
45 | Parameters
46 | ----------
47 | codec : str
48 | codec
49 |
50 | Returns
51 | -------
52 | True if codec is a video codec, False otherwise.
53 | """
54 | if codec in self.codecs["codecs"]["video"]:
55 | return True
56 | return False
57 |
58 | def is_video_title(self, codec):
59 | """
60 | Is this a video title codec?
61 |
62 | Parameters
63 | ----------
64 | codec : str
65 | codec
66 |
67 | Returns
68 | -------
69 | True if codec is a video title codec, False otherwise.
70 | """
71 | if codec in self.codecs["track_titles"]["video"]:
72 | return True
73 | return False
74 |
75 | def is_video_3d(self, codec):
76 | """
77 | Is this a 3d video codec?
78 |
79 | Parameters
80 | ----------
81 | codec : str
82 | codec
83 |
84 | Returns
85 | -------
86 | True if codec is a 3d video, False otherwise.
87 | """
88 | if codec in self.codecs["codecs"]["video_3d"]:
89 | return True
90 | return False
91 |
92 | def is_audio(self, codec):
93 | """
94 | Is this an audio codec?
95 |
96 | Parameters
97 | ----------
98 | codec : str
99 | codec
100 |
101 | Returns
102 | -------
103 | True if codec is an audio codec, False otherwise.
104 | """
105 | if codec in self.codecs["codecs"]["audio"]:
106 | return True
107 | return False
108 |
109 | def is_audio_title(self, codec):
110 | """
111 | Is this an audio title codec?
112 |
113 | Parameters
114 | ----------
115 | codec : str
116 | codec
117 |
118 | Returns
119 | -------
120 | True if codec is an audio title codec, False otherwise.
121 | """
122 | if codec in self.codecs["track_titles"]["audio"]:
123 | return True
124 | return False
125 |
126 | def is_sub(self, codec):
127 | """
128 | Is this a subtitle codec?
129 |
130 | Parameters
131 | ----------
132 | codec : str
133 | codec
134 |
135 | Returns
136 | -------
137 | True if codec is a subtitle codec, False otherwise.
138 | """
139 | if codec in self.codecs["codecs"]["subtitles"]:
140 | return True
141 | return False
142 |
143 | def is_chapter(self, codec):
144 | """
145 | Is this a chapter codec?
146 |
147 | Parameters
148 | ----------
149 | codec : str
150 | codec
151 |
152 | Returns
153 | -------
154 | True if codec is a chapter codec, False otherwise.
155 | """
156 | if codec in self.codecs["codecs"]["chapters"]:
157 | return True
158 | return False
159 |
160 | def is_codec(self, codec):
161 | """
162 | Is this a valid codec?
163 |
164 | Parameters
165 | ----------
166 | codec : str
167 | codec
168 |
169 | Returns
170 | -------
171 | True if valid codec, False otherwise.
172 | """
173 | return codec in self.codec_ext
174 |
175 | def get_codec_ext(self, codec):
176 | """
177 | Get codec extension. Checks if codec is valid.
178 |
179 | Parameters
180 | ----------
181 | codec : str
182 | codec
183 |
184 | Returns
185 | -------
186 | str codec extension
187 | """
188 | if codec not in self.codec_ext:
189 | return ""
190 | return self.codec_ext[codec]
191 |
192 | def get_video_codec_title_name(self, codec):
193 | """
194 | Get name of video codec for title. Checks if video codec is valid.
195 |
196 | Parameters
197 | ----------
198 | codec : str
199 | codec
200 |
201 | Returns
202 | -------
203 | str codec title name
204 | """
205 | if codec not in self.codecs["track_titles"]["video"]:
206 | return ""
207 | return self.codecs["track_titles"]["video"][codec]
208 |
209 | def get_audio_codec_title_name(self, codec):
210 | """
211 | Get name of audio codec for title. Checks if audio codec is valid.
212 |
213 | Parameters
214 | ----------
215 | codec : str
216 | codec
217 |
218 | Returns
219 | -------
220 | str codec title name
221 | """
222 | if codec not in self.codecs["track_titles"]["audio"]:
223 | return ""
224 | return self.codecs["track_titles"]["audio"][codec]
225 |
226 | def get_scan_type_title_name(self, scan_type, video_fps):
227 | """
228 | Get name of video scan type for title. Checks if scan type is valid.
229 |
230 | Parameters
231 | ----------
232 | scan_type : str
233 | scan type
234 |
235 | video_fps : str
236 | frame rate
237 |
238 | Returns
239 | -------
240 | str scan type title name, boolean if actually progressive
241 | """
242 | actually_progressive = False
243 | scan_type = scan_type.strip().lower()
244 |
245 | if len(scan_type) >= 1:
246 | scan_type = "progressive" if scan_type[0] == "p" else "interlaced"
247 |
248 | # interlaced @ 25fps is actually progressive
249 | # but it's still called interlaced
250 | if scan_type == "interlaced" and int(video_fps) == 25:
251 | actually_progressive = True
252 |
253 | if scan_type not in self.codecs["scan_types"]:
254 | return "", actually_progressive
255 | return self.codecs["scan_types"][scan_type], actually_progressive
256 |
--------------------------------------------------------------------------------
/vdator/parsers/match_bdinfo_audio_to_mediainfo.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 |
4 | class MatchBDInfoAudioToMediaInfo(object):
5 | def __init__(self, remove_until_first_codec, bdinfo, mediainfo):
6 | self.remove_until_first_codec = remove_until_first_codec
7 | self.bdinfo = bdinfo
8 | self.mediainfo = mediainfo
9 |
10 | def match_bdinfo_audio_to_mediainfo(self):
11 | # tries to match bdinfo audio tracks to mediainfo by codec and channels
12 | # for every mediainfo track, pick first matching bdinfo track
13 | # returns a sorted list of bdinfo audio tracks
14 | sorted_bdinfo_audio_tracks = list()
15 |
16 | bdinfo_audio_tracks = copy.deepcopy(self.bdinfo["audio"])
17 | mediainfo_audio_tracks = copy.deepcopy(self.mediainfo["audio"])
18 |
19 | for mediainfo_audio_track in mediainfo_audio_tracks:
20 | # go through every mediainfo audio track
21 | mediainfo_audio_title, mediainfo_audio_track_parts = None, []
22 |
23 | if "title" in mediainfo_audio_track:
24 | (
25 | mediainfo_audio_title,
26 | _,
27 | _,
28 | ) = self.remove_until_first_codec.remove(mediainfo_audio_track["title"])
29 | if mediainfo_audio_title:
30 | mediainfo_audio_track_parts = mediainfo_audio_title.split(" / ")
31 |
32 | # find the next matching bdinfo audio track
33 | for i, bdinfo_audio_track in enumerate(bdinfo_audio_tracks):
34 | bdinfo_audio_title = None
35 | if "name" in bdinfo_audio_track:
36 | (
37 | bdinfo_audio_title,
38 | _,
39 | _,
40 | ) = self.remove_until_first_codec.remove(bdinfo_audio_track["name"])
41 |
42 | if len(mediainfo_audio_track_parts) > 1 and bdinfo_audio_title:
43 | bdinfo_audio_track_parts = bdinfo_audio_title.split(" / ")
44 | if len(bdinfo_audio_track_parts) > 1:
45 | if (
46 | bdinfo_audio_track_parts[0]
47 | == mediainfo_audio_track_parts[0]
48 | and bdinfo_audio_track_parts[1]
49 | == mediainfo_audio_track_parts[1]
50 | ):
51 | # codecs and channel match
52 | sorted_bdinfo_audio_tracks.append(bdinfo_audio_track)
53 | del bdinfo_audio_tracks[i]
54 | break
55 |
56 | if len(bdinfo_audio_tracks) == 0:
57 | break
58 |
59 | if len(bdinfo_audio_tracks) > 0:
60 | # add leftover bdinfo audio tracks
61 | sorted_bdinfo_audio_tracks.extend(bdinfo_audio_tracks)
62 |
63 | return sorted_bdinfo_audio_tracks
64 |
--------------------------------------------------------------------------------
/vdator/parsers/media_info_parser.py:
--------------------------------------------------------------------------------
1 | class MediaInfoParser(object):
2 | """
3 | Parse MediaInfo
4 | """
5 |
6 | def parse(self, text):
7 | """
8 | Parse mediainfo
9 |
10 | Parameters
11 | ----------
12 | text : list
13 | list of mediainfo lines
14 |
15 | Returns
16 | -------
17 | dict mediainfo with 'general', 'video', 'audio', 'text', and 'menu' keys
18 | """
19 | mediainfo_sections = ["general", "video", "audio", "text", "menu"]
20 | # dictionary of lists for mediainfo data
21 | mediainfo = dict((k, list()) for k in mediainfo_sections)
22 | # starts at 0 on first loop
23 | section_index = dict((k, -1) for k in mediainfo_sections)
24 | # current mediainfo section
25 | curr_sect = None
26 |
27 | # skip blank lines
28 | text_list = list(filter(None, text))
29 |
30 | for l in text_list:
31 | # new section of mediainfo
32 | section_word = l.strip().split()[0].strip().lower()
33 | if section_word in mediainfo_sections:
34 | # track current section
35 | curr_sect = section_word
36 | # increment index
37 | section_index[section_word] += 1
38 | # store new list for chapters, and new dictionary for other sections
39 | mediainfo[section_word].append(
40 | list() if section_word == "menu" else dict()
41 | )
42 | continue
43 |
44 | # split mediainfo data line
45 | curr = l.split(" : ", 1)
46 |
47 | if curr_sect in ["general", "video", "audio", "text"] and len(curr) >= 2:
48 | # assign section to dictionary
49 | mediainfo[curr_sect][section_index[curr_sect]][
50 | self.format_key(curr[0])
51 | ] = curr[1]
52 | elif curr_sect == "menu":
53 | mediainfo["menu"][section_index[curr_sect]].append(
54 | self.parse_chapter(curr)
55 | )
56 |
57 | return mediainfo
58 |
59 | def format_key(self, key):
60 | """
61 | Format keys into abc_def_ghi
62 |
63 | Parameters
64 | ----------
65 | key : str
66 | mediainfo key
67 |
68 | Returns
69 | -------
70 | str formatted mediainfo key
71 | """
72 | return (
73 | key.strip()
74 | .replace(" ", "_")
75 | .replace("/", "_")
76 | .replace("(", "")
77 | .replace(")", "")
78 | .replace("*", "_")
79 | .replace(",", "")
80 | .lower()
81 | )
82 |
83 | def parse_chapter(self, curr):
84 | """
85 | Parse a single chapter
86 |
87 | Parameters
88 | ----------
89 | curr : list
90 | current line
91 |
92 | Returns
93 | -------
94 | dict chapter
95 | {"time": "...", "titles": [...], "languages": [...]}
96 | languages list has unique elements
97 | """
98 | chapter = {"time": None, "titles": list(), "languages": set()}
99 | if len(curr) >= 1:
100 | chapter["time"] = curr[0].strip()
101 | if len(curr) >= 2:
102 | chapter_title = {
103 | "language": None,
104 | "title": None,
105 | }
106 | if " - " in curr[1]:
107 | langs = curr[1].split(" - ")
108 | for lang in langs:
109 | if ":" in lang:
110 | # chapter has a language
111 | ch = self.format_chapter(lang)
112 | chapter["titles"].append(ch)
113 | chapter["languages"].add(ch["language"])
114 | elif ":" in curr[1]:
115 | # chapter has a language
116 | ch = self.format_chapter(curr[1])
117 | chapter["titles"].append(ch)
118 | chapter["languages"].add(ch["language"])
119 | else:
120 | # no language, just store title
121 | chapter_title["title"] = curr[1]
122 | chapter["titles"].append(chapter_title)
123 | chapter["languages"] = list(chapter["languages"])
124 | return chapter
125 |
126 | def format_chapter(self, text):
127 | """
128 | Format chapter language and title
129 |
130 | Parameters
131 | ----------
132 | text : str
133 | chapter text
134 |
135 | Returns
136 | -------
137 | dict chapter with 'language', 'title' keys
138 | """
139 | l = text.split(":", 1)
140 | chapter = {"language": l[0].strip(), "title": l[1]}
141 | return chapter
142 |
--------------------------------------------------------------------------------
/vdator/parsers/paste_parser.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | from enum import Enum
3 | import os
4 |
5 | # load environment variables
6 | load_dotenv()
7 |
8 | # environment variables
9 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip()
10 | IGNORE_AFTER_LINE_METHOD = os.environ.get("IGNORE_AFTER_LINE_METHOD").strip()
11 | IGNORE_UNTIL_BLANK_LINE_PREFIXES = [
12 | x.strip()
13 | for x in os.getenv("IGNORE_UNTIL_BLANK_LINE_PREFIXES", "").strip().split(",")
14 | ]
15 |
16 |
17 | class BDInfoType(Enum):
18 | QUICK_SUMMARY = 1
19 | PLAYLIST_REPORT = 2
20 |
21 |
22 | class PasteParser(object):
23 | def __init__(self, bdinfo_parser):
24 | self.bdinfo_parser = bdinfo_parser
25 |
26 | class Section(Enum):
27 | QUICK_SUMMARY = 1
28 | MEDIAINFO = 2
29 | PLAYLIST_REPORT = 3
30 | EAC3TO_LOG = 4
31 |
32 | class Section2(Enum):
33 | PLAYLIST_VIDEO = 1
34 | PLAYLIST_AUDIO = 2
35 | PLAYLIST_SUBTITLES = 3
36 |
37 | class Section3(Enum):
38 | PLAYLIST_INNER_VIDEO = 1
39 | PLAYLIST_INNER_AUDIO = 2
40 |
41 | def parse(self, text):
42 | """
43 | Parse text to extract bdinfo, mediainfo and eac3to log
44 |
45 | Parameters
46 | ----------
47 | text : str
48 | text to parse
49 |
50 | Returns
51 | -------
52 | bdinfo, mediainfo, and eac3to lists
53 | """
54 | bdinfo = {"video": list(), "audio": list(), "subtitle": list()}
55 | mediainfo = list()
56 | eac3to = list()
57 | eac3to_index = -1
58 |
59 | sect = None
60 | sect2 = None
61 | sect3 = None
62 |
63 | # parse bdinfo
64 | lines = text.splitlines()
65 | ignore_next_lines, did_first_mediainfo = False, False
66 | for l in lines:
67 | # break after ignore line
68 | if self._isIgnoreAfterLine(l):
69 | break
70 |
71 | if not l.strip():
72 | # don't ignore input after blank line
73 | ignore_next_lines = False
74 | # skip blank lines
75 | continue
76 |
77 | if ignore_next_lines:
78 | continue
79 |
80 | if (
81 | IGNORE_UNTIL_BLANK_LINE_PREFIXES
82 | and IGNORE_UNTIL_BLANK_LINE_PREFIXES[0] != ""
83 | ):
84 | l3 = l.strip().lower()
85 | for x in IGNORE_UNTIL_BLANK_LINE_PREFIXES:
86 | if l3.startswith(x):
87 | ignore_next_lines = True
88 | break
89 |
90 | l = l.strip()
91 | l2 = l.strip().lower()
92 |
93 | # determine current section
94 | # limit to first mediainfo
95 | if (
96 | l2.startswith("quick summary")
97 | or l2.startswith("disc title")
98 | or l2.startswith("disc label")
99 | ):
100 | sect = self.Section.QUICK_SUMMARY
101 | bdinfo["type"] = BDInfoType.QUICK_SUMMARY
102 | elif l2.startswith("playlist report"):
103 | sect = self.Section.PLAYLIST_REPORT
104 | bdinfo["type"] = BDInfoType.PLAYLIST_REPORT
105 | elif l2.startswith("eac3to v"):
106 | sect = self.Section.EAC3TO_LOG
107 | eac3to.append(list())
108 | eac3to_index += 1
109 | elif l2.startswith("general"):
110 | if did_first_mediainfo:
111 | sect = None
112 | else:
113 | sect = self.Section.MEDIAINFO
114 | did_first_mediainfo = True
115 |
116 | if sect == self.Section.QUICK_SUMMARY:
117 | # parse quick summary into bdinfo dict
118 | self.bdinfo_parser.parse_quick_summary_line(bdinfo, l)
119 | elif sect == self.Section.PLAYLIST_REPORT:
120 |
121 | if l2.startswith("video:"):
122 | sect2 = self.Section2.PLAYLIST_VIDEO
123 | elif l2.startswith("audio:"):
124 | sect2 = self.Section2.PLAYLIST_AUDIO
125 | elif l2.startswith("subtitles:"):
126 | sect2 = self.Section2.PLAYLIST_SUBTITLES
127 |
128 | if l2.startswith("-----"):
129 | if sect2 == self.Section2.PLAYLIST_VIDEO:
130 | sect3 = self.Section3.PLAYLIST_INNER_VIDEO
131 | elif sect2 == self.Section2.PLAYLIST_AUDIO:
132 | sect3 = self.Section3.PLAYLIST_INNER_AUDIO
133 | else:
134 | # skip tracks that start with minus sign
135 | if l.startswith("-"):
136 | continue
137 | # parse hidden tracks
138 | l = l.lstrip("* ")
139 |
140 | if (
141 | sect2 == self.Section2.PLAYLIST_VIDEO
142 | and sect3 == self.Section3.PLAYLIST_INNER_VIDEO
143 | ):
144 | # format video track name with slashes
145 | track_name = (
146 | self.bdinfo_parser.playlist_report_format_video_track_name(
147 | l
148 | )
149 | )
150 | if track_name:
151 | bdinfo["video"].append(track_name)
152 |
153 | elif (
154 | sect2 == self.Section2.PLAYLIST_AUDIO
155 | and sect3 == self.Section3.PLAYLIST_INNER_AUDIO
156 | ):
157 | audio_track = (
158 | self.bdinfo_parser.playlist_report_format_audio_track(l)
159 | )
160 | if self.bdinfo_parser.has_compat_track(l):
161 | (
162 | audio_track,
163 | compat_track,
164 | ) = self.bdinfo_parser.format_audio_compatibility_track(
165 | audio_track
166 | )
167 | audio_track["compat_track"] = compat_track
168 | bdinfo["audio"].append(audio_track)
169 |
170 | elif sect == self.Section.MEDIAINFO:
171 | mediainfo.append(l)
172 |
173 | elif sect == self.Section.EAC3TO_LOG:
174 | if l.startswith("Done."):
175 | sect = None
176 | else:
177 | eac3to[eac3to_index].append(l)
178 |
179 | return bdinfo, mediainfo, eac3to
180 |
181 | def _isIgnoreAfterLine(self, l):
182 | """
183 | Check if we should ignore all input after the current line
184 |
185 | Parameters
186 | ----------
187 | l : str
188 | current line
189 |
190 | Returns
191 | -------
192 | True if should ignore further input, False otherwise
193 | """
194 | if IGNORE_AFTER_LINE_METHOD == "equals":
195 | if IGNORE_AFTER_LINE == l:
196 | return True
197 | elif IGNORE_AFTER_LINE_METHOD == "contains":
198 | if IGNORE_AFTER_LINE in l:
199 | return True
200 | return False
201 |
--------------------------------------------------------------------------------
/vdator/parsers/url_parser.py:
--------------------------------------------------------------------------------
1 | from urllib.parse import urlparse
2 | import re
3 |
4 |
5 | class URLParser(object):
6 | def __init__(self, urls):
7 | # regex used to extract urls from message
8 | self.urls_regex = r"(?Phttps?://[^\s]+)"
9 |
10 | """
11 | # hostname
12 | 'example.com': {
13 | # regex to get paste's unique identifier
14 | 'slug_regex': 'https://example.com/(.*)',
15 |
16 | # link to raw text using {} in place of the unique identifier
17 | 'raw_url': 'https://example.com/raw/{}'
18 | }
19 | """
20 | self.urls = urls
21 |
22 | def extract_supported_urls(self, text):
23 | # list of urls
24 | urls = re.findall(self.urls_regex, text)
25 | raw_urls = list()
26 | for url in urls:
27 | o = urlparse(url)
28 | # check if url is supported
29 | if o.hostname in self.urls:
30 | raw_url = self.get_raw_url(url, o.hostname, o.path)
31 | raw_urls.append(raw_url)
32 | return raw_urls
33 |
34 | def get_raw_url(self, url, hostname, path):
35 | # get url to raw content
36 | raw_url = url
37 |
38 | # check if its not already a raw url
39 | is_already_raw_url = re.search(
40 | self.urls[hostname]["raw_url_regex"].format("(.*)"), url
41 | )
42 |
43 | if not is_already_raw_url:
44 | slug = re.search(self.urls[hostname]["slug_regex"], url)
45 | if slug:
46 | raw_url = self.urls[hostname]["raw_url"].format(slug.group(1))
47 |
48 | return raw_url
49 |
50 | def get_urls(self):
51 | return self.urls
52 |
--------------------------------------------------------------------------------
/vdator/reporter.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | # APIs
4 | import emoji
5 | from helpers import num_to_emoji
6 |
7 |
8 | class Reporter(object):
9 | """
10 | Keep track of types of responses
11 | """
12 |
13 | def __init__(self):
14 | self.setup()
15 |
16 | def setup(self):
17 | """
18 | Setup/Reset the reporter
19 | """
20 | self.report = {"correct": 0, "warning": 0, "error": 0, "info": 0, "fail": 0}
21 |
22 | def print_report(self, type, message, record=True, new_line=True):
23 | """
24 | Display report
25 |
26 | Parameters
27 | ----------
28 | type : ReportType
29 | type of report: 'correct', 'warning', 'error', or 'info'
30 |
31 | message : str
32 | reply message
33 |
34 | record : bool
35 | should this report be kept track of in total
36 |
37 | new_line : bool
38 | print a new line after message
39 | default: True
40 | """
41 | if record:
42 | self.report[type.lower()] += 1
43 |
44 | msg_type = {
45 | "correct": emoji.emojize(":ballot_box_with_check:", language="alias"),
46 | "warning": emoji.emojize(":warning:", language="alias"),
47 | "error": emoji.emojize(":x:", language="alias"),
48 | "info": emoji.emojize(":information_source:", language="alias"),
49 | "fail": emoji.emojize(":interrobang:", language="alias"),
50 | }
51 |
52 | if type.lower() in msg_type:
53 | type = msg_type[type.lower()] + " "
54 | else:
55 | type = "[" + type.upper() + "] "
56 |
57 | return type + message + ("\n" if new_line else "")
58 |
59 | def get_report(self):
60 | """
61 | Get the report results
62 |
63 | Returns
64 | -------
65 | report dict: {'correct' : int, 'warning' : int, 'error' : int, 'info' : int, 'fail' : int}
66 | """
67 | return self.report
68 |
69 | def display_report(self):
70 | """
71 | Get the report reply
72 |
73 | Returns
74 | -------
75 | str reply
76 | """
77 | reply = str(self.report["correct"]) + " correct, "
78 |
79 | reply += str(self.report["warning"]) + " warning"
80 | reply += "" if self.report["warning"] == 1 else "s"
81 |
82 | reply += ", " + str(self.report["error"]) + " error"
83 | reply += "" if self.report["error"] == 1 else "s"
84 |
85 | reply += ", " + str(self.report["fail"]) + " failure"
86 | reply += "" if self.report["fail"] == 1 else "s"
87 |
88 | reply += ", and " + str(self.report["info"]) + " info"
89 | return reply
90 |
91 |
92 | async def react_num_errors(message, num_errors):
93 | """
94 | Add status reactions to discord message with number of errors
95 | Adds a plus sign if more than 10 errors
96 |
97 | Parameters
98 | ----------
99 | message : discord.Message
100 | discord message to react to
101 |
102 | num_errors : int
103 | number of errors
104 | """
105 | if num_errors in range(1, 11):
106 | # errors between 1 and 10
107 | em = num_to_emoji(num_errors)
108 | if em:
109 | await message.add_reaction(emoji.emojize(em, language="alias"))
110 | elif num_errors > 10:
111 | # more than 10 errors
112 | await message.add_reaction(emoji.emojize(num_to_emoji(10), language="alias"))
113 | await message.add_reaction(emoji.emojize(":heavy_plus_sign:", language="alias"))
114 |
115 |
116 | async def add_status_reactions(message, content):
117 | """
118 | Add status reactions to discord message
119 |
120 | Parameters
121 | ----------
122 | message : discord.Message
123 | discord message to react to
124 |
125 | content : str
126 | content to parse to determine reactions
127 | """
128 | # add status reactions to message based on content
129 | report_re = re.search(
130 | r"(\d+)\scorrect,\s(\d+)\swarnings?,\s(\d+)\serrors?,\s(\d+)\sfailures?,\sand\s(\d+)\sinfo",
131 | content,
132 | )
133 | if report_re:
134 | report = {
135 | "correct": int(report_re.group(1)),
136 | "warning": int(report_re.group(2)),
137 | "error": int(report_re.group(3)),
138 | "fail": int(report_re.group(4)),
139 | "info": int(report_re.group(5)),
140 | }
141 |
142 | if report["warning"] == 0 and report["error"] == 0 and report["fail"] == 0:
143 | await message.add_reaction(
144 | emoji.emojize(":ballot_box_with_check:", language="alias")
145 | )
146 | else:
147 | if report["warning"] > 0:
148 | await message.add_reaction(emoji.emojize(":warning:", language="alias"))
149 | if report["error"] > 0:
150 | await message.add_reaction(emoji.emojize(":x:", language="alias"))
151 |
152 | num_errors = report["warning"] + report["error"]
153 | if num_errors > 0:
154 | await react_num_errors(message, num_errors)
155 |
156 | if report["fail"] > 0:
157 | await message.add_reaction(
158 | emoji.emojize(":interrobang:", language="alias")
159 | )
160 | await react_num_errors(message, report["fail"])
161 |
--------------------------------------------------------------------------------
/vdator/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp==3.8.3
2 | aiosignal==1.3.1
3 | async-timeout==4.0.2
4 | attrs==22.2.0
5 | black==22.12.0
6 | certifi==2022.12.7
7 | charset-normalizer==2.1.1
8 | cinemagoer==2022.12.27
9 | click==8.1.3
10 | discord-markdown==0.4.0
11 | discord.py==2.1.0
12 | emoji==2.2.0
13 | Flask==2.2.2
14 | frozenlist==1.3.3
15 | greenlet==2.0.2
16 | hunspell==0.5.5
17 | idna==3.4
18 | iso-639==0.4.5
19 | itsdangerous==2.1.2
20 | Jinja2==3.1.2
21 | joblib==1.2.0
22 | langdetect==1.0.9
23 | lxml==4.9.2
24 | MarkupSafe==2.1.2
25 | multidict==6.0.3
26 | mypy-extensions==0.4.3
27 | nltk==3.8.1
28 | numpy==1.23.5
29 | pathspec==0.10.3
30 | platformdirs==2.6.0
31 | pydash==5.1.2
32 | python-dotenv==0.21.0
33 | regex==2022.10.31
34 | requests==2.31.0
35 | six==1.16.0
36 | SQLAlchemy==1.4.46
37 | tmdbsimple==2.9.1
38 | tomli==2.0.1
39 | tqdm==4.64.1
40 | Unidecode==1.3.6
41 | urllib3==1.26.13
42 | Werkzeug==2.2.3
43 | yarl==1.8.2
44 |
--------------------------------------------------------------------------------
/vdator/source_detector.py:
--------------------------------------------------------------------------------
1 | import os, re
2 |
3 | # 'mediainfo' to use mediainfo fields
4 | # 'nobdinfo' to assume DVD if no bdinfo given
5 | DVD_CHECK_MODE = os.environ.get("DVD_CHECK_MODE").strip()
6 |
7 | # detect if DVD, 1080p BluRay or UHD BluRay
8 | class SourceDetector(object):
9 | """
10 | Define ways to detect source
11 | """
12 |
13 | def setup(self, bdinfo, mediainfo):
14 | """
15 | Parameters
16 | ----------
17 | bdinfo : dict
18 | bdinfo
19 |
20 | mediainfo : dict
21 | mediainfo
22 | """
23 | self.bdinfo = bdinfo
24 | self.mediainfo = mediainfo
25 |
26 | def is_dvd(self):
27 | """
28 | Is this source a DVD?
29 |
30 | Returns
31 | -------
32 | boolean True if DVD, False otherwise
33 | """
34 | is_dvd = False
35 |
36 | if DVD_CHECK_MODE == "nobdinfo":
37 | if not self._has_bdinfo():
38 | # no bdinfo given, assume dvds
39 | is_dvd = True
40 | elif DVD_CHECK_MODE == "mediainfo":
41 | if (
42 | "video" in self.mediainfo
43 | and len(self.mediainfo["video"]) >= 1
44 | and "height" in self.mediainfo["video"][0]
45 | ):
46 | height = int(
47 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
48 | )
49 | if height <= 576:
50 | # height is 480p or 576p for dvds
51 | # Note: checking standard is NTSC or PAL won't work, as some BDs are NTSC
52 | is_dvd = True
53 |
54 | return is_dvd
55 |
56 | def is_ntsc_dvd(self):
57 | """
58 | Is this source an NTSC DVD?
59 |
60 | Returns
61 | -------
62 | boolean True if NTSC DVD, False otherwise
63 | """
64 | is_ntsc = False
65 |
66 | if self.is_dvd():
67 | if (
68 | "video" in self.mediainfo
69 | and len(self.mediainfo["video"]) >= 1
70 | and "standard" in self.mediainfo["video"][0]
71 | ):
72 | if self.mediainfo["video"][0]["standard"].upper() == "NTSC":
73 | is_ntsc = True
74 |
75 | return is_ntsc
76 |
77 | def is_pal_dvd(self):
78 | """
79 | Is this source a PAL DVD?
80 |
81 | Returns
82 | -------
83 | boolean True if PAL DVD, False otherwise
84 | """
85 | is_pal = False
86 |
87 | if self.is_dvd():
88 | if (
89 | "video" in self.mediainfo
90 | and len(self.mediainfo["video"]) >= 1
91 | and "standard" in self.mediainfo["video"][0]
92 | ):
93 | if self.mediainfo["video"][0]["standard"].upper() == "PAL":
94 | is_pal = True
95 |
96 | return is_pal
97 |
98 | def is_uhd(self):
99 | """
100 | Is this source a UHD BluRay?
101 |
102 | Returns
103 | -------
104 | boolean True if UHD, False otherwise
105 | """
106 | is_uhd = False
107 |
108 | if (
109 | "video" in self.mediainfo
110 | and len(self.mediainfo["video"]) >= 1
111 | and "height" in self.mediainfo["video"][0]
112 | ):
113 | height = int(
114 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
115 | )
116 | if height == 2160:
117 | is_uhd = True
118 |
119 | return is_uhd
120 |
121 | def is_dv(self):
122 | """
123 | Does this source have dolby vision?
124 |
125 | Returns
126 | -------
127 | boolean True if DV, False otherwise
128 | """
129 | is_dv = False
130 |
131 | if (
132 | "video" in self.mediainfo
133 | and len(self.mediainfo["video"]) >= 1
134 | and "hdr_format" in self.mediainfo["video"][0]
135 | ):
136 | if "Dolby Vision" in self.mediainfo["video"][0]["hdr_format"]:
137 | is_dv = True
138 |
139 | return is_dv
140 |
141 | def _has_bdinfo(self):
142 | """
143 | Does the paste include bdinfo?
144 |
145 | Returns
146 | -------
147 | boolean True if has bdinfo, False otherwise
148 | """
149 | has_bdinfo = False
150 |
151 | if (
152 | len(self.bdinfo["video"]) == 0
153 | and len(self.bdinfo["audio"]) == 0
154 | and len(self.bdinfo["subtitle"]) == 0
155 | ):
156 | has_bdinfo = False
157 | else:
158 | has_bdinfo = True
159 |
160 | return has_bdinfo
161 |
--------------------------------------------------------------------------------