├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── integration-tests.yml
    │   └── lint.yml
├── .gitignore
├── LICENSE
├── README.md
├── example_html_viewer.html
├── test
    ├── discord-bot-test-expect.sh
    ├── discord-bot-test.sh
    ├── test1.ans
    ├── test1.in
    └── test1.sh
└── vdator
    ├── .env.EXAMPLE
    ├── api.py
    ├── checker.py
    ├── checks
        ├── __init__.py
        ├── audio_track_conversions.py
        ├── audio_track_people.py
        ├── audio_track_spellcheck.py
        ├── chapter_language.py
        ├── chapter_padding.py
        ├── check.py
        ├── filename.py
        ├── flac_audio_tracks.py
        ├── has_chapters.py
        ├── metadata_default_flag.py
        ├── metadata_ids.py
        ├── mixins
        │   ├── __init__.py
        │   ├── is_commentary_track.py
        │   ├── is_movie.py
        │   ├── print_header.py
        │   └── section_id.py
        ├── mkvmerge.py
        ├── movie_name_format.py
        ├── muxing_mode.py
        ├── print_audio_track_names.py
        ├── print_chapters.py
        ├── print_text_tracks.py
        ├── remove_until_first_codec.py
        ├── text_default_flag.py
        ├── text_order.py
        ├── tracks_have_language.py
        ├── video_language_matches_first_audio_language.py
        └── video_track.py
    ├── data
        ├── codecs.json
        └── urls.json
    ├── helpers.py
    ├── main.py
    ├── nltk_people.py
    ├── parsers
        ├── __init__.py
        ├── bdinfo_parser.py
        ├── codecs_parser.py
        ├── match_bdinfo_audio_to_mediainfo.py
        ├── media_info_parser.py
        ├── paste_parser.py
        └── url_parser.py
    ├── reporter.py
    ├── requirements.txt
    └── source_detector.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Binary files that should not be normalized or diffed
2 | *.png binary
3 | *.jpg binary
4 | *.gif binary
5 | *.ico binary
6 | 
7 | # Catch all for anything we forgot. Add rules if you get CRLF -> LF warnings.
8 | * eol=lf
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/vdator" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/integration-tests.yml:
--------------------------------------------------------------------------------
 1 | name: integration tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | defaults:
 6 |   run:
 7 |     working-directory: ./vdator
 8 | 
 9 | jobs:
10 |   dependencies:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         python-version: ["3.8", "3.9", "3.10"]
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v2
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v2
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install packages
25 |         run: sudo apt-get install -y expect libhunspell-dev
26 |       - name: Install python dependencies
27 |         run: |
28 |           python -m pip install --upgrade pip
29 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 |       - name: Setup .env
31 |         run: cp .env.EXAMPLE .env
32 | 
33 |   api:
34 | 
35 |     runs-on: ubuntu-latest
36 |     strategy:
37 |       fail-fast: false
38 |       matrix:
39 |         python-version: ["3.8", "3.9", "3.10"]
40 | 
41 |     steps:
42 |       - uses: actions/checkout@v2
43 |       - name: Set up Python ${{ matrix.python-version }}
44 |         uses: actions/setup-python@v2
45 |         with:
46 |           python-version: ${{ matrix.python-version }}
47 |       - name: Install packages
48 |         run: sudo apt-get install -y libhunspell-dev
49 |       - name: Install python dependencies
50 |         run: |
51 |           python -m pip install --upgrade pip
52 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
53 |       - name: Setup .env
54 |         run: cp .env.EXAMPLE .env
55 |       - name: Run API
56 |         env:
57 |           MKVMERGE_VERSION: "Version 57.0.0 \"Till The End\" 2021-05-22"
58 |         run: python3 api.py &
59 |       - name: Test blank input to API
60 |         run: ./test1.sh
61 |         working-directory: ./test
62 | 
63 |   discord-bot:
64 | 
65 |     if: ${{ github.ref == 'refs/heads/main' }}
66 | 
67 |     runs-on: ubuntu-latest
68 | 
69 |     steps:
70 |       - uses: actions/checkout@v2
71 |       - name: Set up Python 3.10
72 |         uses: actions/setup-python@v2
73 |         with:
74 |           python-version: "3.10"
75 |       - name: Install packages
76 |         run: sudo apt-get install -y expect libhunspell-dev
77 |       - name: Install python dependencies
78 |         run: |
79 |           python -m pip install --upgrade pip
80 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
81 |       - name: Setup .env
82 |         run: cp .env.EXAMPLE .env
83 |       - name: Discord bot can join server
84 |         env:
85 |           DISCORD_BOT_SECRET: ${{ secrets.DISCORD_BOT_SECRET }}
86 |         run: ./discord-bot-test-expect.sh
87 |         working-directory: ./test
88 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | defaults:
 6 |   run:
 7 |     working-directory: ./vdator
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.8", "3.9", "3.10"]
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v2
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 |       - name: Install PyHunSpell
24 |         run: sudo apt-get install libhunspell-dev
25 |       - name: Install dependencies
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 |       - name: Setup .env
30 |         run: cp .env.EXAMPLE .env
31 |       - name: Lint with black
32 |         run: black . --check
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | vdator/bin/
 94 | vdator/include
 95 | vdator/lib64
 96 | vdator/pyvenv.cfg
 97 | vdator/.env
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | 
112 | test/*.out
113 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 werrpy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # vdator
  2 | > Remux validator Discord bot
  3 | 
  4 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  5 | [![Lint](https://github.com/werrpy/vdator/actions/workflows/lint.yml/badge.svg)](https://github.com/werrpy/vdator/actions/workflows/lint.yml)
  6 | [![Integration Tests](https://github.com/werrpy/vdator/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/werrpy/vdator/actions/workflows/integration-tests.yml)
  7 | 
  8 | Takes a Pastebin link with BDInfo and MediaInfo dump, and validates the remux.
  9 | 
 10 | Checks:
 11 | ```
 12 | Video track names
 13 | Movie/TV name format
 14 | IMDB/TMDB ids
 15 | Filename
 16 | Video language matches first audio language
 17 | No muxing mode
 18 | Uses latest mkvtoolnix
 19 | Video and audio track names match
 20 | DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA
 21 | Commentary to AC-3 @ 224 kbps
 22 | Commentary track people and spellcheck
 23 | Subtitle order
 24 | Subtitle default flag
 25 | Should have chapters
 26 | Chapter languages
 27 | Chapter padding
 28 | ```
 29 | 
 30 | ### Table of Contents
 31 | - [Supported pastebin sites](#supported-pastebin-sites)
 32 | - [Setup](#setup)
 33 |   * [Create a python3 virtual environment](#create-a-python3-virtual-environment)
 34 |   * [Installing dependencies](#installing-dependencies)
 35 |   * [Updating dependencies](#updating-dependencies)
 36 |   * [Running manually](#running-manually)
 37 |   * [Running with systemd](#running-with-systemd)
 38 | - [Lint](#lint)
 39 | - [Using](#using)
 40 | - [Adding a pastebin site](#adding-a-pastebin-site)
 41 | - [Adding a new check](#adding-a-new-check)
 42 | - [API](#api)
 43 | 
 44 | ### Supported pastebin sites
 45 | 
 46 | - [{d}paste](https://dpaste.com/)
 47 | - [dpaste](https://dpaste.org/)
 48 | - [Hastebin](https://hastebin.com/)
 49 | - [Hey! Paste it](https://www.heypasteit.com/)
 50 | - [CentOS Pastebin Service](https://paste.centos.org/)
 51 | - [Paste.ee](https://paste.ee/)
 52 | - [openSUSE Paste](https://paste.opensuse.org/)
 53 | - [Pastebin](https://pastebin.com/)
 54 | - [Rentry.co - Markdown Pastebin](https://rentry.co/)
 55 | - [termbin](https://termbin.com/)
 56 | - [TextBin](https://textbin.net/)
 57 | 
 58 | ### Setup
 59 | 
 60 | Requires Python >= 3.8
 61 | 
 62 | Create a [Discord bot](https://discordapp.com/developers/docs/intro) and add it to a server. In the bot settings enable "MESSAGE CONTENT INTENT".
 63 | 
 64 | Copy the enviornment variables template `vdator/.env.EXAMPLE` to `vdator/.env`  
 65 | Edit `vdator/.env` and set `DISCORD_BOT_SECRET` to your bot's token.
 66 | 
 67 | Request a [TMDB API Key](https://developers.themoviedb.org/3/getting-started/introduction) and set `TMDB_API_KEY`.
 68 | 
 69 | Don't forget to create channels on the server and set them in `vdator/.env` for `REVIEW_CHANNELS`, `REVIEW_REPLY_CHANNELS`, and `BOT_CHANNELS`.
 70 | 
 71 | To prevent overwriting the `vdator/.env` file when pulling changes from git, do `git update-index --skip-worktree vdator/.env`. When you want to pull a new `.env` file, do `git update-index --no-skip-worktree vdator/.env`.
 72 | 
 73 | #### Create a python3 virtual environment:
 74 | 
 75 | Use [pip and virtual env](https://packaging.python.org/guides/installing-using-pip-and-virtualenv/) to run vdator.
 76 | 
 77 | In the `vdator` directory run:
 78 | ```bash
 79 | python3 -m venv .
 80 | ```
 81 | 
 82 | If the command fails to install pip, you will see an error similar to:
 83 | ```
 84 | Error: Command '['python3', '-Im', 'ensurepip', '--upgrade', '--default-pip']' returned non-zero exit status 1.
 85 | ```
 86 | Start over by creating a virtual environment without pip, and then install pip manually inside it:
 87 | ```bash
 88 | python3 -m venv --without-pip .
 89 | source bin/activate
 90 | curl https://bootstrap.pypa.io/get-pip.py | python3
 91 | deactivate
 92 | ```
 93 | 
 94 | #### Installing dependencies
 95 | 
 96 | Install [PyHunSpell](https://github.com/blatinier/pyhunspell#installation)
 97 | 
 98 | ```bash
 99 | sudo apt install python3-dev libhunspell-dev
100 | ```
101 | 
102 | Install dependencies
103 | 
104 | ```bash
105 | source bin/activate
106 | pip3 install -r requirements.txt
107 | deactivate
108 | ```
109 | 
110 | #### Updating dependencies
111 | 
112 | ```bash
113 | source bin/activate
114 | pip3 install -r requirements.txt --upgrade
115 | pip3 freeze > requirements.txt
116 | deactivate
117 | ```
118 | 
119 | #### Running manually
120 | 
121 | Run the bot manually for testing, exceptions will get printed:
122 | ```bash
123 | source bin/activate
124 | python3 main.py
125 | ```
126 | 
127 | #### Running with systemd
128 | 
129 | Create a systemd service to run vdator, `/etc/systemd/system/vdator.service`
130 | 
131 | ```
132 | [Unit]
133 | Description=vdator
134 | After=multi-user.target
135 | 
136 | [Service]
137 | WorkingDirectory=/home/USER/vdator/venv/vdator
138 | User=
139 | Group=
140 | ExecStart=/home/USER/vdator/venv/bin/python3 /home/USER/vdator/venv/vdator/main.py
141 | Type=idle
142 | Restart=always
143 | RestartSec=15
144 | 
145 | [Install]
146 | WantedBy=multi-user.target
147 | ```
148 | 
149 | Set `User` to the user to run vdator as, and `Group` to the user's group (list with `groups`), usually both are the username.
150 | Replace `/home/USER/vdator/venv/` with the full path to your venv.
151 | 
152 | Run `systemctl enable vdator` to start on boot. Use systemctl to start/stop vdator, `systemctl start vdator`, `systemctl stop vdator`, `systemctl restart vdator`
153 | 
154 | ### Lint
155 | ```bash
156 | black .
157 | ```
158 | 
159 | ### Using
160 | 
161 | Type `!help` in one of the bot channels for more information.
162 | 
163 | ### Adding a pastebin site
164 | 
165 | Edit `vdator/data/urls.json` and add your pastebin site.
166 | 
167 | ```
168 | # hostname
169 | "example.com": {
170 |     # regex to get paste's unique identifier
171 |     "slug_regex": "https://example.com/(.*)",
172 | 
173 |     # regex to check if paste links directly to raw text, using {} in place of the unique identifier
174 |     "raw_url_regex": "https?://pastebin.com/raw/{}",
175 |     
176 |     # link to raw text, using {} in place of the unique identifier
177 |     "raw_url": "https://example.com/raw/{}"
178 | }
179 | ```
180 | 
181 | ### Adding a new check
182 | 
183 | Edit `vdator/checker.py`.
184 | 
185 | In the `run_checks()` method add:
186 | ```python
187 | reply += MyNewCheck(self.reporter, self.mediainfo).run()
188 | ```
189 | 
190 | Edit `vdator/checks/__init__.py` and add:
191 | ```python
192 | from .my_check import *
193 | ```
194 | 
195 | Create `vdator/checks/my_check.py`:
196 | ```python
197 | from .check import *
198 | 
199 | 
200 | class MyNewCheck(Check):
201 |     def __init__(self, reporter, mediainfo):
202 |         super().__init__(reporter, mediainfo, "Error running my check")
203 | 
204 |     # overriding abstract method
205 |     def get_reply(self):
206 |         reply = ""
207 |         # use self.mediainfo here
208 |         # use has() and has_many() to check if the mediainfo keys you need exist, for example:
209 |         # if has_many(self.mediainfo, "video.0", ["height"]):
210 |             # safe to use self.mediainfo["video"][0]["height"] here
211 |         # use self.reporter.print_report() to print status messages
212 |         reply += self.reporter.print_report("info", "Some info message")
213 |         # lastly return the string result of the check which is appended to the bot reply in run_checks()
214 |         return reply
215 | ```
216 | 
217 | ### API
218 | 
219 | Run with `python api.py`
220 | 
221 | Default is port 5000, to use a different port set the PORT environment variable with `export PORT=5000 && python api.py`
222 | 
223 | Example using Postman:
224 | ```
225 | POST http://127.0.0.1:5000/text
226 |     Body, raw
227 |     [INSERT TEXT HERE]
228 | ```
229 | 
230 | Gives back json:
231 | ```json
232 | {
233 | 	"discord_reply":"...",
234 | 	"html_reply":"..."
235 | }
236 | ```
237 | **discord_reply** - the text that the bot usually sends to discord  
238 | **html_reply** - discord text formatted as html
239 | 
240 | Insert the `html_reply` text into the `example_html_viewer.html` to see it formatted similar to discord.
241 | 
242 | For testing, force a specific version of mkvmerge with
243 | 
244 | ````bash
245 | export MKVMERGE_VERSION="Version 54.0.0 \"Hill The End\" 2021-05-22" && python api.py
246 | ````
247 | 
248 | 


--------------------------------------------------------------------------------
/example_html_viewer.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | 
 4 | <meta charset="UTF-8">
 5 | <style>
 6 | .container {
 7 |     height: 300px;
 8 |     overflow-y: auto;
 9 |     margin: 0 auto;
10 |     max-width: 1000px;
11 | }
12 | </style>
13 | 
14 | <style>
15 | .discord-theme {
16 |     font-family: Whitney, "Helvetica Neue", Helvetica, Arial, sans-serif;
17 |     background: #36393f;
18 |     color: #dcddde;
19 | }
20 | .discord-theme p {
21 |     margin: 5px 0;
22 |     font-size: 16px;
23 | }
24 | .discord-theme code {
25 |     background: #2f3136;
26 | }
27 | </style>
28 | 
29 | </head>
30 | 
31 | <body>
32 | 
33 | <div class="container">
34 | 
35 | <div class="discord-theme">
36 | <!-- insert json["html_reply"] here -->
37 | 
38 | 
39 | 
40 | </div>
41 | 
42 | </div>
43 | 
44 | </body>
45 | 
46 | </html>
47 | 


--------------------------------------------------------------------------------
/test/discord-bot-test-expect.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/expect -f
 2 | 
 3 | # timeout after 60 seconds
 4 | set timeout 60
 5 | 
 6 | spawn ./discord-bot-test.sh
 7 | 
 8 | expect "I'm in\r"
 9 | expect "vdator-github-actions#7018\r"
10 | 


--------------------------------------------------------------------------------
/test/discord-bot-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python ../vdator/main.py
4 | 


--------------------------------------------------------------------------------
/test/test1.ans:
--------------------------------------------------------------------------------
1 | {"discord_reply":"\u274c No mediainfo. Are you missing the `General` heading?\n> **Report**\n0 correct, 0 warnings, 1 error, 0 failures, and 0 info","html_reply":"<p><img src='https://discord.com/assets/8becd37ab9d13cdfe37c08c496a9def3.svg' height='16'> No mediainfo. Are you missing the <code>General</code> heading?</p><p><b>Report</b></p><p>0 correct, 0 warnings, 1 error, 0 failures, and 0 info</p>"}
2 | 


--------------------------------------------------------------------------------
/test/test1.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/werrpy/vdator/a6be476623551b797c94a3f5944c1d7c921bfb94/test/test1.in


--------------------------------------------------------------------------------
/test/test1.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | curl -s -X POST --data-binary "$(cat test1.in)" http://127.0.0.1:5000/text > test1.out
4 | diff test1.out test1.ans
5 | 


--------------------------------------------------------------------------------
/vdator/.env.EXAMPLE:
--------------------------------------------------------------------------------
 1 | # channels to listen in and add reactions
 2 | REVIEW_CHANNELS=upload-review, remux
 3 | 
 4 | # channels to send full summary to if from review channel
 5 | REVIEW_REPLY_CHANNELS=remux-bot
 6 | 
 7 | # channels to listen in and post full summaries
 8 | BOT_CHANNELS=remux-bot
 9 | 
10 | # trainee channels
11 | TRAINEE_CHANNELS=upload-review
12 | 
13 | # internal channels
14 | INTERNAL_CHANNELS=remux
15 | 
16 | # release group
17 | RELEASE_GROUP=GROUP
18 | 
19 | # in-game, Now Playing...
20 | IN_GAME=Remux n00b
21 | 
22 | # stop parsing after encountering this line
23 | IGNORE_AFTER_LINE=%%%
24 | 
25 | # method to check for line to ignore after
26 | # 'equals' or 'contains'
27 | IGNORE_AFTER_LINE_METHOD=contains
28 | 
29 | # ignore input until blank line if current line starts with one of these
30 | #IGNORE_UNTIL_BLANK_LINE_PREFIXES=
31 | 
32 | # DVD check mode
33 | # 'mediainfo' to use mediainfo fields
34 | # 'nobdinfo' to assume DVD if no bdinfo given
35 | DVD_CHECK_MODE=nobdinfo
36 | 
37 | DISCORD_BOT_SECRET=
38 | DISCORD_MSG_CHAR_LIMIT=2000
39 | 
40 | TMDB_API_KEY=
41 | HUNSPELL_LANG=/usr/share/hunspell/en_US.dic, /usr/share/hunspell/en_US.aff
42 | MISSPELLED_IGNORE_LIST=upmix
43 | 
44 | MKVTOOLNIX_NEWS=https://mkvtoolnix.download/doc/NEWS.md
45 | 
46 | FILENAME_CUTS=Directors.Cut, Extended.Cut, Final.Cut, Theatrical, Uncut, Unrated
47 | 
48 | # how many years off the movie year can be. (default: 1)
49 | #MOVIE_YEAR_OFFSET=1
50 | 


--------------------------------------------------------------------------------
/vdator/api.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Experimental REST API
  3 | 
  4 | > python3 api.py
  5 | POST http://127.0.0.1:5000/text
  6 |     Body, raw
  7 |     [INSERT TEXT HERE]
  8 |     
  9 | {"discord_reply":"...", "html_reply":"..."}
 10 | """
 11 | 
 12 | import json, os, traceback
 13 | from flask import Flask, jsonify, request
 14 | 
 15 | from discord_markdown.discord_markdown import (
 16 |     Compiler,
 17 |     convert_to_html as discord_markdown_convert_to_html,
 18 | )
 19 | 
 20 | # Override discord_markdown.discord_markdown.Compiler.compile method to disable printing
 21 | # https://github.com/bitjockey42/discord-markdown/blob/9b8d267e3bf1b333bccaae5619a3f2af0a5a54a1/discord_markdown/compiler.py#L29-L37
 22 | def compile(self, markdown=False):
 23 |     if not self._parser.tree:
 24 |         self._parser.parse()
 25 |     self._code = ""
 26 |     for node in self._parser.tree:
 27 |         self._code = self._code + node.eval(markdown=markdown)
 28 |     self._code = self._code.strip()
 29 |     return self._code
 30 | 
 31 | 
 32 | Compiler.compile = compile
 33 | 
 34 | # parsers
 35 | from parsers import *
 36 | from source_detector import SourceDetector
 37 | from reporter import Reporter
 38 | from checker import Checker
 39 | 
 40 | # script location
 41 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
 42 | 
 43 | # initialize parsers
 44 | bdinfo_parser = BDInfoParser()
 45 | paste_parser = PasteParser(bdinfo_parser)
 46 | mediainfo_parser = MediaInfoParser()
 47 | 
 48 | with open(os.path.join(__location__, "data/codecs.json")) as f:
 49 |     codecs = json.load(f)
 50 |     codecs_parser = CodecsParser(codecs)
 51 | 
 52 | source_detector = SourceDetector()
 53 | reporter = Reporter()
 54 | checker = Checker(codecs_parser, source_detector, reporter)
 55 | 
 56 | app = Flask(__name__)
 57 | 
 58 | 
 59 | @app.route("/text", methods=["POST"])
 60 | def parse_text():
 61 |     """
 62 |     POST http://127.0.0.1:5000/text
 63 |     Body, raw
 64 |     [INSERT TEXT HERE]
 65 |     """
 66 | 
 67 |     reply = ""
 68 | 
 69 |     try:
 70 |         # setup/reset reporter
 71 |         reporter.setup()
 72 |         text = request.get_data().decode("utf-8")
 73 |         bdinfo, mediainfo, eac3to = paste_parser.parse(text)
 74 |     except:
 75 |         traceback.print_exc()
 76 |         reply += reporter.print_report("fail", "Failed to get paste")
 77 |     else:
 78 |         if mediainfo:
 79 |             try:
 80 |                 # parse mediainfo
 81 |                 mediainfo = mediainfo_parser.parse(mediainfo)
 82 |             except:
 83 |                 traceback.print_exc()
 84 |                 reply += reporter.print_report("fail", "Mediainfo parser failed")
 85 |             else:
 86 |                 try:
 87 |                     # setup checker
 88 |                     checker.setup(bdinfo, mediainfo, eac3to, "remux-bot")
 89 |                 except:
 90 |                     traceback.print_exc()
 91 |                     reply += reporter.print_report(
 92 |                         "fail", "vdator failed to setup checker"
 93 |                     )
 94 |                 else:
 95 |                     try:
 96 |                         reply += checker.run_checks()
 97 |                     except:
 98 |                         traceback.print_exc()
 99 |                         reply += reporter.print_report("fail", "vdator failed to parse")
100 |         else:
101 |             reply += reporter.print_report(
102 |                 "error", "No mediainfo. Are you missing the `General` heading?"
103 |             )
104 | 
105 |     # report
106 |     reply += "> **Report**\n"
107 |     reply += reporter.display_report()
108 | 
109 |     # prevent infinite loop with 2 multi-line code blocks
110 |     # https://github.com/bitjockey42/discord-markdown/issues/6
111 |     reply_to_convert = reply.replace("```", "===")
112 |     # remove quotes around sections
113 |     reply_to_convert = reply_to_convert.replace("> **", "**")
114 | 
115 |     # convert to html
116 |     reply_html = discord_markdown_convert_to_html(reply_to_convert)
117 | 
118 |     # format html
119 |     reply_html = reply_html.replace("===", "<br>")
120 |     # emojis
121 |     reply_html = reply_html.replace(
122 |         "☑",
123 |         "<img src='http://discord.com//assets/86c16c39d96283551fd4ca7392e22681.svg' height='16'>",
124 |     )
125 |     reply_html = reply_html.replace(
126 |         "⚠",
127 |         "<img src='https://discord.com/assets/289673858e06dfa2e0e3a7ee610c3a30.svg' height='16'>",
128 |     )
129 |     reply_html = reply_html.replace(
130 |         "❌",
131 |         "<img src='https://discord.com/assets/8becd37ab9d13cdfe37c08c496a9def3.svg' height='16'>",
132 |     )
133 | 
134 |     data = {"discord_reply": reply, "html_reply": reply_html}
135 | 
136 |     return jsonify(data)
137 | 
138 | 
139 | PORT = os.environ.get("PORT", "5000")
140 | app.run(port=PORT)
141 | 


--------------------------------------------------------------------------------
/vdator/checker.py:
--------------------------------------------------------------------------------
  1 | from dotenv import load_dotenv
  2 | import logging, os
  3 | 
  4 | # load environment variables
  5 | load_dotenv()
  6 | 
  7 | # TMDb API
  8 | import tmdbsimple as tmdb
  9 | 
 10 | tmdb.API_KEY = os.environ.get("TMDB_API_KEY")
 11 | 
 12 | # IMDb API
 13 | from imdb import Cinemagoer
 14 | 
 15 | ia = Cinemagoer()
 16 | logger = logging.getLogger("imdbpy")
 17 | logger.disabled = True
 18 | 
 19 | # checks
 20 | from checks.mixins import PrintHeader, SectionId, IsCommentaryTrack
 21 | from checks.remove_until_first_codec import RemoveUntilFirstCodec
 22 | from checks import *
 23 | 
 24 | # nltk data
 25 | from nltk_people import download_nltk_data
 26 | 
 27 | download_nltk_data()
 28 | 
 29 | 
 30 | class Checker(PrintHeader, SectionId, IsCommentaryTrack):
 31 |     def __init__(self, codecs_parser, source_detector, reporter):
 32 |         self.codecs = codecs_parser
 33 |         self.remove_until_first_codec = RemoveUntilFirstCodec(codecs_parser)
 34 |         self.source_detector = source_detector
 35 |         self.reporter = reporter
 36 | 
 37 |     def setup(self, bdinfo, mediainfo, eac3to, channel_name):
 38 |         self.bdinfo = bdinfo
 39 |         self.mediainfo = mediainfo
 40 |         self.eac3to = eac3to
 41 |         self.channel_name = channel_name
 42 |         self.source_detector.setup(bdinfo, mediainfo)
 43 | 
 44 |     def run_checks(self):
 45 |         reply = ""
 46 | 
 47 |         # check metadata
 48 |         reply += self._print_header("Metadata")
 49 |         reply += CheckMovieNameFormat(self.reporter, self.mediainfo).run()
 50 |         reply += CheckMetadataIds(self.reporter, self.mediainfo, tmdb, ia).run()
 51 |         reply += CheckFilename(
 52 |             self.reporter,
 53 |             self.source_detector,
 54 |             self.codecs,
 55 |             self.remove_until_first_codec,
 56 |             self.mediainfo,
 57 |             self.bdinfo,
 58 |             self.channel_name,
 59 |         ).run()
 60 |         reply += CheckTracksHaveLanguage(self.reporter, self.mediainfo).run()
 61 |         reply += CheckVideoLanguageMatchesFirstAudioLanguage(
 62 |             self.reporter, self.mediainfo
 63 |         ).run()
 64 |         reply += CheckMuxingMode(self.reporter, self.mediainfo).run()
 65 |         reply += CheckMKVMerge(self.reporter, self.mediainfo).run()
 66 |         reply += CheckMetadataDefaultFlag(self.reporter, self.mediainfo).run()
 67 | 
 68 |         # check video
 69 |         reply += self._print_header("Video & Audio Tracks")
 70 |         reply += CheckVideoTrack(
 71 |             self.reporter,
 72 |             self.source_detector,
 73 |             self.codecs,
 74 |             self.mediainfo,
 75 |             self.bdinfo,
 76 |         ).run()
 77 | 
 78 |         # check audio
 79 |         reply += CheckPrintAudioTrackNames(self.reporter, self.mediainfo).run()
 80 |         reply += CheckAudioTrackConversions(
 81 |             self.reporter,
 82 |             self.source_detector,
 83 |             self.codecs,
 84 |             self.remove_until_first_codec,
 85 |             self.mediainfo,
 86 |             self.bdinfo,
 87 |             self.eac3to,
 88 |         ).run()
 89 |         # check FLAC audio using mediainfo
 90 |         reply += CheckFLACAudioTracks(
 91 |             self.reporter, self.remove_until_first_codec, self.mediainfo
 92 |         ).run()
 93 | 
 94 |         # TMDb and IMDb People API
 95 |         reply += CheckAudioTrackPeople(
 96 |             self.reporter, self.remove_until_first_codec, self.mediainfo, tmdb, ia
 97 |         ).run()
 98 |         reply += CheckAudioTrackSpellCheck(
 99 |             self.reporter, self.remove_until_first_codec, self.mediainfo
100 |         ).run()
101 | 
102 |         # check text
103 |         reply += self._print_header("Text Tracks")
104 |         reply += CheckPrintTextTracks(self.reporter, self.mediainfo).run()
105 |         reply += CheckTextOrder(self.reporter, self.mediainfo).run()
106 |         reply += CheckTextDefaultFlag(self.reporter, self.mediainfo).run()
107 | 
108 |         # check chapters
109 |         reply += CheckPrintChapters(self.reporter, self.mediainfo).run()
110 |         reply += CheckHasChapters(self.reporter, self.mediainfo, self.eac3to).run()
111 |         reply += CheckChapterLanguage(self.reporter, self.mediainfo).run()
112 |         reply += CheckChapterPadding(self.reporter, self.mediainfo).run()
113 | 
114 |         return reply
115 | 


--------------------------------------------------------------------------------
/vdator/checks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | # metadata
 4 | from .movie_name_format import CheckMovieNameFormat
 5 | from .metadata_ids import CheckMetadataIds
 6 | from .filename import CheckFilename
 7 | from .tracks_have_language import CheckTracksHaveLanguage
 8 | from .video_language_matches_first_audio_language import (
 9 |     CheckVideoLanguageMatchesFirstAudioLanguage,
10 | )
11 | from .muxing_mode import CheckMuxingMode
12 | from .mkvmerge import CheckMKVMerge
13 | from .metadata_default_flag import CheckMetadataDefaultFlag
14 | 
15 | # video
16 | from .video_track import CheckVideoTrack
17 | 
18 | # audio
19 | from .print_audio_track_names import CheckPrintAudioTrackNames
20 | from .audio_track_conversions import CheckAudioTrackConversions
21 | from .flac_audio_tracks import CheckFLACAudioTracks
22 | from .audio_track_people import CheckAudioTrackPeople
23 | from .audio_track_spellcheck import CheckAudioTrackSpellCheck
24 | 
25 | # text
26 | from .print_text_tracks import CheckPrintTextTracks
27 | from .text_order import CheckTextOrder
28 | from .text_default_flag import CheckTextDefaultFlag
29 | 
30 | # chapters
31 | from .print_chapters import CheckPrintChapters
32 | from .has_chapters import CheckHasChapters
33 | from .chapter_language import CheckChapterLanguage
34 | from .chapter_padding import CheckChapterPadding
35 | 


--------------------------------------------------------------------------------
/vdator/checks/audio_track_conversions.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | from .mixins import SectionId, IsCommentaryTrack
  3 | 
  4 | import re
  5 | 
  6 | 
  7 | class CheckAudioTrackConversions(Check, SectionId, IsCommentaryTrack):
  8 |     def __init__(
  9 |         self,
 10 |         reporter,
 11 |         source_detector,
 12 |         codecs,
 13 |         remove_until_first_codec,
 14 |         mediainfo,
 15 |         bdinfo,
 16 |         eac3to,
 17 |     ):
 18 |         super().__init__(reporter, mediainfo, "Error checking audio track conversions")
 19 |         self.source_detector = source_detector
 20 |         self.codecs = codecs
 21 |         self.remove_until_first_codec = remove_until_first_codec
 22 |         self.bdinfo = bdinfo
 23 |         self.eac3to = eac3to
 24 | 
 25 |     # overriding abstract method
 26 |     def get_reply(self):
 27 |         reply = ""
 28 | 
 29 |         if self.source_detector.is_dvd():
 30 |             # no audio track conversions for dvds
 31 |             reply += self.reporter.print_report(
 32 |                 "info", "No audio track conversions to check for DVDs"
 33 |             )
 34 |             return reply
 35 |         else:
 36 |             len_bdinfo = len(self.bdinfo["audio"])
 37 |             len_mediainfo = len(self.mediainfo["audio"])
 38 |             min_len = min(len_bdinfo, len_mediainfo)
 39 | 
 40 |             for i in range(0, min_len):
 41 |                 # audio = dict{'name':'...', 'language':'...'}
 42 |                 bdinfo_audio_title = re.sub(
 43 |                     r"\s+", " ", self.bdinfo["audio"][i]["name"]
 44 |                 )
 45 |                 bdinfo_audio_parts = bdinfo_audio_title.split(" / ")
 46 |                 bdinfo_audio_parts_converted = bdinfo_audio_parts.copy()
 47 | 
 48 |                 # check audio commentary
 49 |                 (is_commentary, commentary_reply) = self._check_commentary(i)
 50 | 
 51 |                 if is_commentary:
 52 |                     reply += commentary_reply
 53 |                 elif len(bdinfo_audio_parts) >= 1:
 54 |                     optionally_flac = False
 55 |                     # check audio conversions
 56 |                     if bdinfo_audio_parts[0] == "DTS-HD Master Audio":
 57 |                         # DTS-HD MA
 58 |                         channels = float(bdinfo_audio_parts[1])
 59 |                         if is_float(bdinfo_audio_parts[1]):
 60 |                             if channels < 3:
 61 |                                 # can be DTS-HD MA 1.0, DTS-HD MA 2.0, FLAC 1.0, and FLAC 2.0
 62 |                                 optionally_flac = True
 63 | 
 64 |                                 reply += self._check_audio_conversion(
 65 |                                     i,
 66 |                                     "DTS-HD Master Audio",
 67 |                                     ["DTS-HD Master Audio", "FLAC Audio"],
 68 |                                 )
 69 |                             else:
 70 |                                 reply += self._check_audio_conversion(
 71 |                                     i, "DTS-HD Master Audio", ["DTS-HD Master Audio"]
 72 |                                 )
 73 | 
 74 |                     elif bdinfo_audio_parts[0] == "LPCM Audio":
 75 |                         if (
 76 |                             is_float(bdinfo_audio_parts[1])
 77 |                             and float(bdinfo_audio_parts[1]) < 3
 78 |                         ):
 79 |                             # LPCM 1.0 or 2.0 to FLAC
 80 |                             reply += self._check_audio_conversion(
 81 |                                 i, "LPCM Audio", ["FLAC Audio"]
 82 |                             )
 83 |                             bdinfo_audio_parts_converted[0] = "FLAC Audio"
 84 |                         else:
 85 |                             # LPCM > 2.0 to DTS-HD MA
 86 |                             reply += self._check_audio_conversion(
 87 |                                 i, "LPCM Audio", ["DTS-HD Master Audio"]
 88 |                             )
 89 |                             bdinfo_audio_parts_converted[0] = "DTS-HD Master Audio"
 90 | 
 91 |                     # check track names match
 92 |                     if "title" in self.mediainfo["audio"][i]:
 93 |                         mediainfo_audio_title = self.mediainfo["audio"][i][
 94 |                             "title"
 95 |                         ].strip()
 96 |                         (
 97 |                             mediainfo_audio_title,
 98 |                             _,
 99 |                             _,
100 |                         ) = self.remove_until_first_codec.remove(mediainfo_audio_title)
101 | 
102 |                         bdinfo_audio_title = " / ".join(bdinfo_audio_parts_converted)
103 |                         bdinfo_audio_titles = [bdinfo_audio_title]
104 |                         if optionally_flac:
105 |                             # May be converted to FLAC
106 |                             # Add DTS-HD MA 1.0/2.0/2.1 and FLAC 1.0/2.0/2.1 as options
107 |                             old_bdinfo_audio_parts_converted = (
108 |                                 bdinfo_audio_parts_converted.copy()
109 |                             )
110 | 
111 |                             bdinfo_audio_parts_converted[0] = "FLAC Audio"
112 |                             # FLAC 2.0/2.1
113 |                             bdinfo_audio_titles.append(
114 |                                 " / ".join(bdinfo_audio_parts_converted)
115 |                             )
116 |                             bdinfo_audio_parts_converted[1] = "1.0"
117 |                             # FLAC 1.0
118 |                             bdinfo_audio_titles.append(
119 |                                 " / ".join(bdinfo_audio_parts_converted)
120 |                             )
121 | 
122 |                             # DTS-HD MA 2.0/2.1
123 |                             bdinfo_audio_titles.append(
124 |                                 " / ".join(old_bdinfo_audio_parts_converted)
125 |                             )
126 |                             old_bdinfo_audio_parts_converted[1] = "1.0"
127 |                             # DTS-HD MA 1.0
128 |                             bdinfo_audio_titles.append(
129 |                                 " / ".join(old_bdinfo_audio_parts_converted)
130 |                             )
131 | 
132 |                         if self.mediainfo["audio"][i]["title"] in bdinfo_audio_titles:
133 |                             reply += self.reporter.print_report(
134 |                                 "correct",
135 |                                 "Audio "
136 |                                 + self._section_id("audio", i)
137 |                                 + ": Track names match",
138 |                             )
139 |                         else:
140 |                             # use bitrate from mediainfo audio title
141 |                             m_bit_rate = re.search(
142 |                                 r"(\d+)\skbps", mediainfo_audio_title
143 |                             )
144 |                             if m_bit_rate:
145 |                                 m_bit_rate = m_bit_rate.group(1)
146 |                                 for j, title in enumerate(bdinfo_audio_titles):
147 |                                     bdinfo_audio_titles[j] = re.sub(
148 |                                         r"(.*\s)\d+(\skbps.*)",
149 |                                         r"\g<1>{}\g<2>".format(m_bit_rate),
150 |                                         title,
151 |                                     )
152 | 
153 |                             # if it has TrueHD objects, add them to the audio channel
154 |                             if (
155 |                                 "number_of_dynamic_objects"
156 |                                 in self.mediainfo["audio"][i]
157 |                             ):
158 |                                 bdinfo_audio_title = re.sub(
159 |                                     r"(.*\d\.\d)(.*)",
160 |                                     r"\g<1>+{} objects\g<2>".format(
161 |                                         self.mediainfo["audio"][i][
162 |                                             "number_of_dynamic_objects"
163 |                                         ]
164 |                                     ),
165 |                                     bdinfo_audio_title,
166 |                                 )
167 |                                 bdinfo_audio_titles.append(bdinfo_audio_title)
168 | 
169 |                             # bdinfo_audio_titles has list of possible titles
170 |                             if mediainfo_audio_title not in bdinfo_audio_titles:
171 |                                 reply += self.reporter.print_report(
172 |                                     "error",
173 |                                     "Audio "
174 |                                     + self._section_id("audio", i)
175 |                                     + ": Bad conversion:\n```fix\nBDInfo: "
176 |                                     + bdinfo_audio_title
177 |                                     + "\nMediaInfo: "
178 |                                     + self.mediainfo["audio"][i]["title"]
179 |                                     + "```",
180 |                                     new_line=False,
181 |                                 )
182 |                                 reply += show_diff(
183 |                                     self.mediainfo["audio"][i]["title"],
184 |                                     bdinfo_audio_title,
185 |                                 )
186 |                             else:
187 |                                 reply += self.reporter.print_report(
188 |                                     "correct",
189 |                                     "Audio "
190 |                                     + self._section_id("audio", i)
191 |                                     + ": Track names match",
192 |                                 )
193 |                     else:
194 |                         reply += self.reporter.print_report(
195 |                             "error",
196 |                             "Audio "
197 |                             + self._section_id("audio", i)
198 |                             + ": Missing track name",
199 |                         )
200 | 
201 |             if min_len < len_mediainfo:
202 |                 reply += self.reporter.print_report(
203 |                     "warning",
204 |                     "Checked first `{}/{}` audio tracks".format(min_len, len_mediainfo),
205 |                 )
206 | 
207 |         return reply
208 | 
209 |     def _check_commentary(self, i):
210 |         reply, is_commentary = "", False
211 | 
212 |         if self._is_commentary_track(self.mediainfo["audio"][i]["title"]):
213 |             is_commentary = True
214 |             # audio = dict{'name':'...', 'language':'...'}
215 |             if self.bdinfo["audio"][i]["name"].count("/") >= 1:
216 |                 bdinfo_audio_format = (
217 |                     self.bdinfo["audio"][i]["name"].split("/")[0].strip()
218 |                 )
219 | 
220 |                 if bdinfo_audio_format == "Dolby Digital Audio":
221 |                     if "format" in self.mediainfo["audio"][i]:
222 |                         if self.mediainfo["audio"][i]["format"] == "AC-3":
223 |                             reply += self.reporter.print_report(
224 |                                 "correct",
225 |                                 "Audio "
226 |                                 + self._section_id("audio", i)
227 |                                 + ": Commentary already AC-3",
228 |                             )
229 |                         else:
230 |                             reply += self.reporter.print_report(
231 |                                 "error",
232 |                                 "Audio "
233 |                                 + self._section_id("audio", i)
234 |                                 + ": Commentary should be AC-3 instead of "
235 |                                 + self.mediainfo["audio"][i]["format"],
236 |                             )
237 |                     else:
238 |                         reply += self.reporter.print_report(
239 |                             "error",
240 |                             "Audio "
241 |                             + self._section_id("audio", i)
242 |                             + ": Commentary does not have a format",
243 |                         )
244 | 
245 |                     return is_commentary, reply
246 |             else:
247 |                 reply += self.reporter.print_report(
248 |                     "warning",
249 |                     "Audio #"
250 |                     + self._section_id("audio", i)
251 |                     + ": Cannot verify commentary audio conversion",
252 |                 )
253 |                 return is_commentary, reply
254 | 
255 |             if (
256 |                 "format" in self.mediainfo["audio"][i]
257 |                 and self.mediainfo["audio"][i]["format"] == "AC-3"
258 |             ):
259 |                 if "bit_rate" in self.mediainfo["audio"][i]:
260 |                     bit_rate = "".join(
261 |                         re.findall(r"[\d]+", self.mediainfo["audio"][i]["bit_rate"])
262 |                     )
263 |                     if bit_rate == "224":
264 |                         reply += self.reporter.print_report(
265 |                             "correct",
266 |                             "Audio "
267 |                             + self._section_id("audio", i)
268 |                             + ": Commentary converted to `AC-3 @ 224 kbps`",
269 |                         )
270 |                     else:
271 |                         reply += self.reporter.print_report(
272 |                             "error",
273 |                             "Audio "
274 |                             + self._section_id("audio", i)
275 |                             + ": Commentary AC-3 bitrate should be `224 kbps` instead of `"
276 |                             + self.mediainfo["audio"][i]["bit_rate"]
277 |                             + "`",
278 |                         )
279 |                 else:
280 |                     reply += self.reporter.print_report(
281 |                         "error",
282 |                         "Audio "
283 |                         + self._section_id("audio", i)
284 |                         + ": Commentary AC-3 does not have a bitrate",
285 |                     )
286 |             else:
287 |                 reply += self.reporter.print_report(
288 |                     "info",
289 |                     "Audio "
290 |                     + self._section_id("audio", i)
291 |                     + ": Commentary may be converted to AC-3",
292 |                 )
293 | 
294 |         return is_commentary, reply
295 | 
296 |     def _check_audio_conversion(self, i, audio_from, audio_to):
297 |         reply = ""
298 | 
299 |         # verify audio track titles
300 |         if (
301 |             " / " not in self.bdinfo["audio"][i]["name"]
302 |             or "title" not in self.mediainfo["audio"][i]
303 |             or " / " not in self.mediainfo["audio"][i]["title"]
304 |         ):
305 |             reply += self.reporter.print_report(
306 |                 "warning", "Could not verify audio " + self._section_id("audio", i)
307 |             )
308 |             return reply
309 | 
310 |         # [codec, channel, sampling rate, bit rate, bit depth]
311 |         bdinfo_audio_parts = self.bdinfo["audio"][i]["name"].split(" / ")
312 |         if len(bdinfo_audio_parts) <= 4:
313 |             reply += self.reporter.print_report(
314 |                 "warning", "Could not verify audio " + self._section_id("audio", i)
315 |             )
316 |             return reply
317 | 
318 |         mediainfo_audio_title = self.mediainfo["audio"][i]["title"]
319 |         (mediainfo_audio_title, _, _) = self.remove_until_first_codec.remove(
320 |             mediainfo_audio_title
321 |         )
322 | 
323 |         # [codec, channel, sampling rate, bit rate, bit depth]
324 |         mediainfo_parts = mediainfo_audio_title.split(" / ")
325 |         if len(mediainfo_parts) <= 4:
326 |             reply += self.reporter.print_report(
327 |                 "warning", "Could not verify audio " + self._section_id("audio", i)
328 |             )
329 |             return reply
330 | 
331 |         # verify audio conversions
332 |         if mediainfo_parts[0] in audio_to:
333 |             disable_channels_check = self._eac3to_log_has_mono()
334 | 
335 |             if (
336 |                 not disable_channels_check
337 |                 and mediainfo_parts[1] != bdinfo_audio_parts[1]
338 |             ):
339 |                 reply += self.reporter.print_report(
340 |                     "error",
341 |                     "Audio "
342 |                     + self._section_id("audio", i)
343 |                     + ": Channels should be `"
344 |                     + bdinfo_audio_parts[1]
345 |                     + "` instead of `"
346 |                     + mediainfo_parts[1]
347 |                     + "`",
348 |                 )
349 | 
350 |             # mediainfo bitrate should be less than bdinfo bitrate
351 |             try:
352 |                 m_bit_rate = int(
353 |                     "".join(re.findall(r"\d+", mediainfo_parts[3].strip()))
354 |                 )
355 | 
356 |                 bd_bit_rate = int(
357 |                     "".join(re.findall(r"\d+", bdinfo_audio_parts[3].strip()))
358 |                 )
359 | 
360 |                 if m_bit_rate > bd_bit_rate:
361 |                     reply += self.reporter.print_report(
362 |                         "error",
363 |                         "Audio "
364 |                         + self._section_id("audio", i)
365 |                         + ": MediaInfo bitrate is greater than BDInfo bitrate: `"
366 |                         + str(m_bit_rate)
367 |                         + " kbps > "
368 |                         + str(bd_bit_rate)
369 |                         + " kbps`",
370 |                     )
371 |             except ValueError:
372 |                 pass
373 |         else:
374 |             reply += self.reporter.print_report(
375 |                 "error",
376 |                 "Audio "
377 |                 + self._section_id("audio", i)
378 |                 + " should be converted to one of ["
379 |                 + ", ".join(audio_to)
380 |                 + "]",
381 |             )
382 | 
383 |         return reply
384 | 
385 |     def _eac3to_log_has_mono(self):
386 |         # get command-lines
387 | 
388 |         cmd_lines_mono = list()
389 |         for log in self.eac3to:
390 |             cmd_lines_mono.extend(
391 |                 [
392 |                     l.lower()
393 |                     for l in log
394 |                     if l.lower().startswith("command line:")
395 |                     and "-mono" in l.lower().split()
396 |                 ]
397 |             )
398 | 
399 |         return len(cmd_lines_mono) > 0
400 | 


--------------------------------------------------------------------------------
/vdator/checks/audio_track_people.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | from nltk_people import extract_names
 5 | 
 6 | 
 7 | class CheckAudioTrackPeople(Check, SectionId):
 8 |     def __init__(self, reporter, remove_until_first_codec, mediainfo, tmdb, ia):
 9 |         super().__init__(reporter, mediainfo, "Error checking IMDb/TMDb people")
10 |         self.remove_until_first_codec = remove_until_first_codec
11 |         self.tmdb = tmdb
12 |         self.ia = ia
13 | 
14 |     # overriding abstract method
15 |     def get_reply(self):
16 |         reply = ""
17 | 
18 |         # check people in audio track names
19 |         for i, _ in enumerate(self.mediainfo["audio"]):
20 |             if "title" in self.mediainfo["audio"][i]:
21 |                 title = self.mediainfo["audio"][i]["title"]
22 | 
23 |                 # skip if has an audio codec
24 |                 _, _, found_codec = self.remove_until_first_codec.remove(title)
25 |                 if found_codec:
26 |                     continue
27 | 
28 |                 # try to match names
29 |                 matched_names = list()
30 |                 names = extract_names(title)
31 |                 search = self.tmdb.Search()
32 |                 for n in names:
33 |                     # TMDb API
34 |                     try:
35 |                         search.person(query=n)
36 |                         for s in search.results:
37 |                             if n == s["name"]:
38 |                                 matched_names.append(n)
39 |                     except:
40 |                         reply += self.reporter.print_report(
41 |                             "info",
42 |                             "Audio "
43 |                             + self._section_id("audio", i)
44 |                             + ": Failed to get TMDb people data",
45 |                         )
46 |                     # IMDb API
47 |                     try:
48 |                         for person in self.ia.search_person(n):
49 |                             if n == person["name"]:
50 |                                 matched_names.append(n)
51 |                     except:
52 |                         reply += self.reporter.print_report(
53 |                             "info",
54 |                             "Audio "
55 |                             + self._section_id("audio", i)
56 |                             + ": Failed to get IMDb people data",
57 |                         )
58 |                 matched_names = set(matched_names)
59 |                 if len(matched_names) > 0:
60 |                     reply += self.reporter.print_report(
61 |                         "correct",
62 |                         "Audio "
63 |                         + self._section_id("audio", i)
64 |                         + " People Matched: `"
65 |                         + ", ".join(matched_names)
66 |                         + "`",
67 |                     )
68 |                 unmatched_names = set(names) - set(matched_names)
69 |                 if len(unmatched_names) > 0:
70 |                     reply += self.reporter.print_report(
71 |                         "warning",
72 |                         "Audio "
73 |                         + self._section_id("audio", i)
74 |                         + " People Unmatched: `"
75 |                         + ", ".join(unmatched_names)
76 |                         + "`",
77 |                     )
78 | 
79 |         return reply
80 | 


--------------------------------------------------------------------------------
/vdator/checks/audio_track_spellcheck.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | from dotenv import load_dotenv
 5 | import nltk
 6 | from nltk_people import extract_names
 7 | import hunspell, os, string
 8 | 
 9 | # load environment variables
10 | load_dotenv()
11 | 
12 | HUNSPELL_LANG = [x.strip() for x in os.environ.get("HUNSPELL_LANG").split(",")]
13 | MISSPELLED_IGNORE_LIST = [
14 |     x.strip() for x in os.environ.get("MISSPELLED_IGNORE_LIST").split(",")
15 | ]
16 | 
17 | 
18 | class CheckAudioTrackSpellCheck(Check, SectionId):
19 |     def __init__(self, reporter, remove_until_first_codec, mediainfo):
20 |         super().__init__(reporter, mediainfo, "Error spell checking audio track names")
21 |         self.hobj = hunspell.HunSpell(HUNSPELL_LANG[0], HUNSPELL_LANG[1])
22 |         self.remove_until_first_codec = remove_until_first_codec
23 | 
24 |     # overriding abstract method
25 |     def get_reply(self):
26 |         reply = ""
27 | 
28 |         # spellcheck audio track names
29 |         for i, _ in enumerate(self.mediainfo["audio"]):
30 |             if "title" in self.mediainfo["audio"][i]:
31 |                 title, title_parts, found_codec = self.remove_until_first_codec.remove(
32 |                     self.mediainfo["audio"][i]["title"]
33 |                 )
34 | 
35 |                 # spellcheck title parts before codec or entire audio title
36 |                 spellcheck_text = " ".join(title_parts) if found_codec else title
37 |                 if spellcheck_text:
38 |                     # map punctuation to space
39 |                     translator = str.maketrans(
40 |                         string.punctuation, " " * len(string.punctuation)
41 |                     )
42 |                     spellcheck_text = spellcheck_text.translate(translator)
43 | 
44 |                     # ignore names
45 |                     ignore_list = extract_names(spellcheck_text)
46 |                     ignore_list = [a for b in ignore_list for a in b.split()]
47 | 
48 |                     # tokenize
49 |                     tokens = nltk.word_tokenize(spellcheck_text)
50 |                     tokens = [t for t in tokens if t not in ignore_list]
51 | 
52 |                     misspelled_words = list()
53 |                     for t in tokens:
54 |                         if not self.hobj.spell(t):
55 |                             # t is misspelled
56 |                             misspelled_words.append(t)
57 | 
58 |                     misspelled_words = set(misspelled_words)
59 |                     misspelled_words = [
60 |                         word
61 |                         for word in misspelled_words
62 |                         if word.lower() not in MISSPELLED_IGNORE_LIST
63 |                     ]
64 |                     if len(misspelled_words) > 0:
65 |                         reply += self.reporter.print_report(
66 |                             "error",
67 |                             "Audio "
68 |                             + self._section_id("audio", i)
69 |                             + " Misspelled: `"
70 |                             + ", ".join(misspelled_words)
71 |                             + "`",
72 |                         )
73 | 
74 |         return reply
75 | 


--------------------------------------------------------------------------------
/vdator/checks/chapter_language.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | 
  3 | from iso639 import languages as iso639_languages
  4 | from langdetect import detect as langdetect_detect, DetectorFactory
  5 | 
  6 | # make language detection deterministic
  7 | DetectorFactory.seed = 0
  8 | 
  9 | 
 10 | class CheckChapterLanguage(Check):
 11 |     def __init__(self, reporter, mediainfo):
 12 |         super().__init__(reporter, mediainfo, "Error checking chapter language")
 13 | 
 14 |     # overriding abstract method
 15 |     def get_reply(self):
 16 |         reply = ""
 17 | 
 18 |         if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0:
 19 |             if len(self.mediainfo["menu"]) >= 1:
 20 |                 for i, chapters in enumerate(self.mediainfo["menu"]):
 21 |                     if len(chapters) >= 1:
 22 |                         # chapter numbers that have an invalid language
 23 |                         invalid_ch_lang_nums = list()
 24 |                         # chapters = list of chapters
 25 |                         # [{'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']}]
 26 |                         # {'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']}
 27 |                         ch_0 = chapters[0]
 28 |                         # concatenate all chapter titles into phrases
 29 |                         # ch_0["languages"] = ['...', '...']
 30 |                         # chapter_phrases = {'de': '...', 'en': '...'}
 31 |                         chapter_phrases = {k: "" for k in ch_0["languages"]}
 32 |                         # list of detected languages with chapter languages as keys
 33 |                         # chapter_langs = {'de': [...], 'en': [...]}
 34 |                         chapter_langs = {k: list() for k in ch_0["languages"]}
 35 | 
 36 |                         for ch in chapters:
 37 |                             for j, lang in enumerate(ch["languages"]):
 38 |                                 if lang:
 39 |                                     try:
 40 |                                         ch_lang = iso639_languages.get(part1=lang)
 41 |                                         # store chapter language
 42 |                                         chapter_langs[lang].append(ch_lang)
 43 |                                     except KeyError:
 44 |                                         # store invalid chapter number
 45 |                                         invalid_ch_lang_nums.append(str(j + 1))
 46 |                                 else:
 47 |                                     # store invalid chapter number
 48 |                                     invalid_ch_lang_nums.append(str(j + 1))
 49 | 
 50 |                             for title in ch["titles"]:
 51 |                                 # store as key "NA" if there is no chapter language set
 52 |                                 if title["language"] is None:
 53 |                                     title["language"] = "NA"
 54 |                                 if title["language"] not in chapter_phrases:
 55 |                                     chapter_phrases[title["language"]] = ""
 56 |                                 chapter_phrases[title["language"]] += (
 57 |                                     title["title"] + "\n"
 58 |                                 )
 59 | 
 60 |                         if len(invalid_ch_lang_nums) > 0:
 61 |                             if len(invalid_ch_lang_nums) == len(chapters):
 62 |                                 reply += self.reporter.print_report(
 63 |                                     "error",
 64 |                                     f"Chapters {i + 1}: All chapters do not have a language set",
 65 |                                 )
 66 |                             elif len(invalid_ch_lang_nums) > 0:
 67 |                                 reply += self.reporter.print_report(
 68 |                                     "error",
 69 |                                     f"Chapters {i + 1}: The following chapters do not have a language set: `"
 70 |                                     + ", ".join(invalid_ch_lang_nums)
 71 |                                     + "`",
 72 |                                 )
 73 |                             else:
 74 |                                 reply += self.reporter.print_report(
 75 |                                     "correct",
 76 |                                     f"Chapters {i + 1}: All chapters have a language set",
 77 |                                 )
 78 | 
 79 |                         for k, chapter_phrase in chapter_phrases.items():
 80 |                             if k == "NA":
 81 |                                 reply += self.reporter.print_report(
 82 |                                     "error",
 83 |                                     f"Chapters {i + 1}: No chapter language set",
 84 |                                 )
 85 |                                 continue
 86 |                             if chapter_phrase:
 87 |                                 chapter_langs[k] = list(set(chapter_langs[k]))
 88 |                                 try:
 89 |                                     detected_lang = langdetect_detect(chapter_phrase)
 90 |                                     ch_detected_lang = iso639_languages.get(
 91 |                                         part1=detected_lang
 92 |                                     )
 93 |                                     if ch_detected_lang in chapter_langs[k]:
 94 |                                         reply += self.reporter.print_report(
 95 |                                             "correct",
 96 |                                             f"Chapters {i + 1}: Language matches detected language: `"
 97 |                                             + ch_detected_lang.name
 98 |                                             + "`",
 99 |                                         )
100 |                                     else:
101 |                                         chapter_langs_names = ", ".join(
102 |                                             list(
103 |                                                 set(
104 |                                                     [
105 |                                                         detected_lang.name
106 |                                                         for detected_lang in chapter_langs[
107 |                                                             k
108 |                                                         ]
109 |                                                     ]
110 |                                                 )
111 |                                             )
112 |                                         )
113 |                                         if chapter_langs_names:
114 |                                             reply += self.reporter.print_report(
115 |                                                 "error",
116 |                                                 f"Chapters {i + 1}: Languages: `"
117 |                                                 + chapter_langs_names
118 |                                                 + "` do not match detected language: `"
119 |                                                 + ch_detected_lang.name
120 |                                                 + "`",
121 |                                             )
122 |                                         else:
123 |                                             reply += self.reporter.print_report(
124 |                                                 "error",
125 |                                                 f"Chapters {i + 1}: No chapter languages. Detected language: `"
126 |                                                 + ch_detected_lang.name
127 |                                                 + "`",
128 |                                             )
129 |                                 except KeyError:
130 |                                     reply += self.reporter.print_report(
131 |                                         "warning", "Could not detect chapters language"
132 |                                     )
133 |             else:
134 |                 reply += self.reporter.print_report(
135 |                     "error", "Must have at least 1 chapter menu"
136 |                 )
137 | 
138 |         return reply
139 | 


--------------------------------------------------------------------------------
/vdator/checks/chapter_padding.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | import re
 4 | 
 5 | 
 6 | class CheckChapterPadding(Check):
 7 |     def __init__(self, reporter, mediainfo):
 8 |         super().__init__(reporter, mediainfo, "Error checking chapter padding")
 9 | 
10 |     # overriding abstract method
11 |     def get_reply(self):
12 |         reply, padded_correctly = "", True
13 | 
14 |         if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0:
15 |             if len(self.mediainfo["menu"]) >= 1:
16 |                 for i, menu in enumerate(self.mediainfo["menu"]):
17 |                     padded_correctly = True
18 |                     num_chapters = len(menu)
19 |                     for ch in menu:
20 |                         for title in ch["titles"]:
21 |                             if re.search(
22 |                                 r"^chapter\s\d+", title["title"], re.IGNORECASE
23 |                             ):
24 |                                 # numbered chapter
25 |                                 ch_num = "".join(re.findall(r"[\d]+", title["title"]))
26 |                                 if ch_num != ch_num.zfill(len(str(num_chapters))):
27 |                                     padded_correctly = False
28 |                                     break
29 |                     if padded_correctly:
30 |                         reply += self.reporter.print_report(
31 |                             "correct", f"Chapters {i + 1}: Properly padded"
32 |                         )
33 |                     else:
34 |                         reply += self.reporter.print_report(
35 |                             "error", f"Chapters {i + 1}: Incorrect padding"
36 |                         )
37 | 
38 |         return reply
39 | 


--------------------------------------------------------------------------------
/vdator/checks/check.py:
--------------------------------------------------------------------------------
 1 | import sys, traceback
 2 | 
 3 | # allow imports from parent directory
 4 | sys.path.append("../")
 5 | 
 6 | from abc import abstractmethod
 7 | from pydash import has
 8 | from helpers import has_many, show_diff, is_float
 9 | 
10 | 
11 | class Check(object):
12 |     def __init__(self, reporter, mediainfo, run_fail_msg):
13 |         self.reporter = reporter
14 |         self.mediainfo = mediainfo
15 |         self.run_fail_msg = run_fail_msg
16 | 
17 |     def run(self):
18 |         """
19 |         Runs the check and returns reply.
20 |         Wraps check in try...except to prevent crashes
21 | 
22 |         Returns
23 |         -------
24 |         reply string
25 |         """
26 |         reply = ""
27 |         try:
28 |             reply += self.get_reply()
29 |         except:
30 |             traceback.print_exc()
31 |             reply += self.reporter.print_report("fail", self.run_fail_msg)
32 |         return reply
33 | 
34 |     @abstractmethod
35 |     def get_reply(self):
36 |         """
37 |         Gets reply from this check
38 | 
39 |         Returns
40 |         -------
41 |         reply string
42 |         """
43 |         pass
44 | 


--------------------------------------------------------------------------------
/vdator/checks/filename.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | 
  3 | from dotenv import load_dotenv
  4 | import os, re, string, unidecode
  5 | 
  6 | # load environment variables
  7 | load_dotenv()
  8 | 
  9 | CUTS = [None] + [x.strip() for x in os.environ.get("FILENAME_CUTS").split(",")]
 10 | RELEASE_GROUP = os.environ.get("RELEASE_GROUP").strip()
 11 | TRAINEE_CHANNELS = [x.strip() for x in os.environ.get("TRAINEE_CHANNELS").split(",")]
 12 | INTERNAL_CHANNELS = [x.strip() for x in os.environ.get("INTERNAL_CHANNELS").split(",")]
 13 | 
 14 | 
 15 | class CheckFilename(Check):
 16 |     def __init__(
 17 |         self,
 18 |         reporter,
 19 |         source_detector,
 20 |         codecs,
 21 |         remove_until_first_codec,
 22 |         mediainfo,
 23 |         bdinfo,
 24 |         channel_name,
 25 |     ):
 26 |         super().__init__(reporter, mediainfo, "Error checking filename")
 27 |         self.source_detector = source_detector
 28 |         self.codecs = codecs
 29 |         self.remove_until_first_codec = remove_until_first_codec
 30 |         self.bdinfo = bdinfo
 31 |         self.channel_name = channel_name
 32 | 
 33 |     # overriding abstract method
 34 |     def get_reply(self):
 35 |         reply = ""
 36 | 
 37 |         if has_many(self.mediainfo, "general.0", ["movie_name", "complete_name"]):
 38 |             complete_name = self.mediainfo["general"][0]["complete_name"]
 39 |             if "\\" in complete_name:
 40 |                 complete_name = complete_name.split("\\")[-1]
 41 |             elif "/" in complete_name:
 42 |                 complete_name = complete_name.split("/")[-1]
 43 | 
 44 |             # possible release names
 45 |             complete_name_lc = complete_name.lower()
 46 |             possible_release_names = [
 47 |                 self._construct_release_name(
 48 |                     cut,
 49 |                     hybird=("hybrid" in complete_name_lc),
 50 |                     repack=("repack" in complete_name_lc),
 51 |                 )
 52 |                 for cut in CUTS
 53 |             ]
 54 | 
 55 |             if (
 56 |                 self.channel_name in INTERNAL_CHANNELS
 57 |                 and complete_name in possible_release_names
 58 |             ):
 59 |                 reply += self.reporter.print_report(
 60 |                     "correct", "Filename: `" + complete_name + "`"
 61 |                 )
 62 |             elif self._partial_match(possible_release_names, complete_name):
 63 |                 reply += self.reporter.print_report(
 64 |                     "correct", "Filename: `" + complete_name + "`"
 65 |                 )
 66 |             else:
 67 |                 expected_release_name = possible_release_names[0]
 68 | 
 69 |                 # pick the expected release name with the proper cut
 70 |                 for i, cut in enumerate(CUTS[1:]):
 71 |                     if cut in complete_name:
 72 |                         expected_release_name = possible_release_names[i + 1]
 73 | 
 74 |                 if self.channel_name not in INTERNAL_CHANNELS:
 75 |                     expected_release_name += "GRouP.mkv"
 76 | 
 77 |                 reply += self.reporter.print_report(
 78 |                     "error",
 79 |                     "Filename missmatch:\n```fix\nFilename: "
 80 |                     + complete_name
 81 |                     + "\nExpected: "
 82 |                     + expected_release_name
 83 |                     + "```",
 84 |                     new_line=False,
 85 |                 )
 86 |                 reply += show_diff(complete_name, expected_release_name)
 87 |         else:
 88 |             reply += self.reporter.print_report("error", "Cannot validate filename")
 89 | 
 90 |         return reply
 91 | 
 92 |     def _construct_release_name(self, cut=None, hybird=False, repack=False):
 93 |         release_name = ""
 94 | 
 95 |         if not self.source_detector.is_dvd():
 96 |             # scan type must come from bdinfo
 97 |             bdinfo_video_parts = self.bdinfo["video"][0].split(" / ")
 98 |             scan_type = bdinfo_video_parts[2].strip()[-1].lower()
 99 | 
100 |         if has_many(self.mediainfo, "video.0", ["height", "title"]) and has(
101 |             self.mediainfo, "audio.0.title"
102 |         ):
103 |             # Name.S01E01 or Name.S01E01E02
104 |             tv_show_name_search = re.search(
105 |                 r"(.+)\s-\s(S\d{2}(E\d{2})+)",
106 |                 self.mediainfo["general"][0]["movie_name"],
107 |             )
108 |             # Name.Year
109 |             movie_name_search = re.search(
110 |                 r"(.+)\s\((\d{4})\)", self.mediainfo["general"][0]["movie_name"]
111 |             )
112 |             if tv_show_name_search:
113 |                 title = self._format_filename_title(tv_show_name_search.group(1))
114 |                 season_episode = tv_show_name_search.group(2).strip()
115 |                 release_name += title + "." + season_episode
116 |             elif movie_name_search:
117 |                 title = self._format_filename_title(movie_name_search.group(1))
118 |                 year = movie_name_search.group(2).strip()
119 |                 release_name += title + "." + year
120 |             else:
121 |                 release_name += self._format_filename_title(
122 |                     self.mediainfo["general"][0]["movie_name"]
123 |                 )
124 | 
125 |             # with or without hybrid
126 |             if hybird:
127 |                 release_name += ".Hybrid"
128 | 
129 |             # with or without repack
130 |             if repack:
131 |                 release_name += ".REPACK"
132 | 
133 |             # check cuts here
134 |             if cut is not None:
135 |                 release_name += "." + cut
136 | 
137 |             # resolution (ex. 1080p)
138 |             height = "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
139 | 
140 |             if self.source_detector.is_dvd():
141 |                 # source DVD
142 |                 if "standard" in self.mediainfo["video"][0]:
143 |                     release_name += "." + self.mediainfo["video"][0]["standard"]
144 |                 release_name += ".DVD.REMUX"
145 |             elif self.source_detector.is_uhd():
146 |                 # source UHD BluRay
147 |                 release_name += "." + height
148 |                 release_name += scan_type
149 |                 release_name += ".UHD.BluRay.REMUX"
150 |                 # Dolby Vision (DV)
151 |                 if self.source_detector.is_dv():
152 |                     release_name += ".DV"
153 |                 # SDR/HDR
154 |                 if self.mediainfo["video"][0]["color_primaries"] == "BT.2020":
155 |                     release_name += ".HDR"
156 |                 else:
157 |                     release_name += ".SDR"
158 |             else:
159 |                 # source HD BluRay
160 |                 release_name += "." + height
161 |                 release_name += scan_type
162 |                 release_name += ".BluRay.REMUX"
163 | 
164 |             # video format (ex. AVC)
165 |             main_video_title = self.mediainfo["video"][0]["title"].split(" / ")
166 |             if len(main_video_title) >= 1:
167 |                 release_name += "." + self.codecs.get_video_codec_title_name(
168 |                     main_video_title[0].strip()
169 |                 )
170 | 
171 |             main_audio_title = self.mediainfo["audio"][0]["title"]
172 |             (
173 |                 main_audio_title,
174 |                 _,
175 |                 _,
176 |             ) = self.remove_until_first_codec.remove(main_audio_title)
177 |             main_audio_title_parts = main_audio_title.split(" / ")
178 | 
179 |             audio_codec_title, main_audio_channels = None, None
180 | 
181 |             # get main audio codec
182 |             if len(main_audio_title) > 0:
183 |                 main_audio_codec = main_audio_title_parts[0]
184 |                 if self.codecs.is_audio_title(main_audio_codec):
185 |                     audio_codec_title = self.codecs.get_audio_codec_title_name(
186 |                         main_audio_codec
187 |                     )
188 | 
189 |             # get main audio channels
190 |             if len(main_audio_title) > 1:
191 |                 main_audio_channels = main_audio_title_parts[1]
192 |                 search_channel_atmos = re.search(
193 |                     r"(\d.\d)\+\d+\sobjects", main_audio_channels
194 |                 )
195 |                 if search_channel_atmos:
196 |                     main_audio_channels = search_channel_atmos.group(1)
197 | 
198 |             if (
199 |                 audio_codec_title
200 |                 and main_audio_channels
201 |                 and is_float(main_audio_channels)
202 |             ):
203 |                 # have main audio codec and channels
204 |                 if audio_codec_title == "TrueHD.Atmos":
205 |                     # atmos channel
206 |                     release_name += ".TrueHD." + main_audio_channels + ".Atmos"
207 |                 else:
208 |                     release_name += "." + audio_codec_title + "." + main_audio_channels
209 | 
210 |             # release group
211 |             release_name += "-"
212 |             if self.channel_name in INTERNAL_CHANNELS:
213 |                 release_name += RELEASE_GROUP + ".mkv"
214 | 
215 |         # replace multiple dots with one
216 |         release_name = re.sub("\.+", ".", release_name)
217 | 
218 |         return release_name
219 | 
220 |     def _format_filename_title(self, title):
221 |         title = title.strip()
222 |         # remove accents
223 |         title = unidecode.unidecode(title)
224 |         # remove punctuation
225 |         title = title.replace("&", "and")
226 |         title = "".join([i for i in title if not i in string.punctuation or i == "."])
227 |         title = title.replace(":", ".")
228 |         # replace spaces with dots
229 |         title = title.replace(" ", ".")
230 |         # force single dots
231 |         title = re.sub(r"\.+", ".", title)
232 |         return title
233 | 
234 |     def _partial_match(self, possible_names, name):
235 |         for n in possible_names:
236 |             if n in name:
237 |                 return True
238 |         return False
239 | 


--------------------------------------------------------------------------------
/vdator/checks/flac_audio_tracks.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | import re
 5 | 
 6 | 
 7 | class CheckFLACAudioTracks(Check, SectionId):
 8 |     def __init__(self, reporter, remove_until_first_codec, mediainfo):
 9 |         super().__init__(reporter, mediainfo, "Error checking FLAC audio tracks")
10 |         self.remove_until_first_codec = remove_until_first_codec
11 | 
12 |     # overriding abstract method
13 |     def get_reply(self):
14 |         # check FLAC Audio tracks using mediainfo
15 |         reply = ""
16 | 
17 |         if len(self.mediainfo["audio"]) > 0:
18 |             for i, audio_track in enumerate(self.mediainfo["audio"]):
19 |                 # skip if no title
20 |                 if "title" not in audio_track:
21 |                     continue
22 | 
23 |                 # skip if no codec info
24 |                 audio_title, _, found_codec = self.remove_until_first_codec.remove(
25 |                     audio_track["title"]
26 |                 )
27 |                 if not found_codec:
28 |                     continue
29 | 
30 |                 if "format" in audio_track and audio_track["format"] == "FLAC":
31 |                     channels = float(
32 |                         "".join(
33 |                             re.findall(
34 |                                 r"\d*\.\d+|\d+", audio_track["channels"].strip().lower()
35 |                             )
36 |                         )
37 |                     )
38 |                     sampling_rate = int(
39 |                         float(
40 |                             "".join(
41 |                                 re.findall(
42 |                                     r"\d*\.\d+|\d+",
43 |                                     audio_track["sampling_rate"].strip().lower(),
44 |                                 )
45 |                             )
46 |                         )
47 |                     )
48 |                     bit_rate = int(
49 |                         "".join(
50 |                             re.findall(r"\d+", audio_track["bit_rate"].strip().lower())
51 |                         )
52 |                     )
53 |                     bit_depth = (
54 |                         audio_track["bit_depth"]
55 |                         .strip()
56 |                         .lower()
57 |                         .replace(" bits", "-bit")
58 |                     )
59 |                     test_title = (
60 |                         "FLAC Audio / "
61 |                         + "{:.1f}".format(channels)
62 |                         + " / "
63 |                         + str(sampling_rate)
64 |                         + " kHz / "
65 |                         + str(bit_rate)
66 |                         + " kbps / "
67 |                         + bit_depth
68 |                     )
69 | 
70 |                     if test_title == audio_title:
71 |                         reply += self.reporter.print_report(
72 |                             "correct",
73 |                             "Audio "
74 |                             + self._section_id("audio", i)
75 |                             + ": FLAC Good track name (from MediaInfo)",
76 |                         )
77 |                     else:
78 |                         reply += self.reporter.print_report(
79 |                             "error",
80 |                             "Audio "
81 |                             + self._section_id("audio", i)
82 |                             + ": FLAC Bad track name (from MediaInfo):\n```fix\nActual: "
83 |                             + audio_title
84 |                             + "\nExpected: "
85 |                             + test_title
86 |                             + "```",
87 |                             new_line=False,
88 |                         )
89 | 
90 |         return reply
91 | 


--------------------------------------------------------------------------------
/vdator/checks/has_chapters.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | 
 4 | class CheckHasChapters(Check):
 5 |     def __init__(self, reporter, mediainfo, eac3to):
 6 |         super().__init__(reporter, mediainfo, "Error checking if should have chapters")
 7 |         self.eac3to = eac3to
 8 | 
 9 |     # overriding abstract method
10 |     def get_reply(self):
11 |         reply, should_have_chapters = "", False
12 |         for log in self.eac3to:
13 |             for l in log:
14 |                 if "chapters" in l:
15 |                     should_have_chapters = True
16 |         if should_have_chapters:
17 |             if len(self.mediainfo["menu"]) > 0:
18 |                 reply += self.reporter.print_report(
19 |                     "correct", "Has chapters (from eac3to log)"
20 |                 )
21 |             else:
22 |                 reply += self.reporter.print_report(
23 |                     "error", "Should have chapters (from eac3to log)"
24 |                 )
25 |         return reply
26 | 


--------------------------------------------------------------------------------
/vdator/checks/metadata_default_flag.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | 
 4 | class CheckMetadataDefaultFlag(Check):
 5 |     def __init__(self, reporter, mediainfo):
 6 |         super().__init__(reporter, mediainfo, "Error checking metadata default flag")
 7 | 
 8 |     # overriding abstract method
 9 |     def get_reply(self):
10 |         # only one track of each type should be default=yes
11 |         reply, default_yes_error = "", False
12 |         track_types = ["audio", "text"]
13 | 
14 |         for track_type in track_types:
15 |             default_yes_count = 0
16 |             for track in self.mediainfo[track_type]:
17 |                 if "default" in track and track["default"].lower() == "yes":
18 |                     default_yes_count += 1
19 |             if default_yes_count > 1:
20 |                 reply += self.reporter.print_report(
21 |                     "error",
22 |                     "Only 1 {} track should be `default=yes`".format(track_type),
23 |                 )
24 |                 default_yes_error = True
25 | 
26 |         if not default_yes_error:
27 |             reply += self.reporter.print_report(
28 |                 "correct",
29 |                 "Only 1 track of each type is `default=yes`",
30 |             )
31 |         return reply
32 | 


--------------------------------------------------------------------------------
/vdator/checks/metadata_ids.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | from .mixins import IsMovie
  3 | 
  4 | from dotenv import load_dotenv
  5 | import datetime, os, re
  6 | import imdb
  7 | 
  8 | # load environment variables
  9 | load_dotenv()
 10 | 
 11 | # how many years off the movie year can be. (default: 1)
 12 | MOVIE_YEAR_OFFSET = int(os.environ.get("MOVIE_YEAR_OFFSET", "1").strip())
 13 | 
 14 | 
 15 | class CheckMetadataIds(Check, IsMovie):
 16 |     def __init__(self, reporter, mediainfo, tmdb, ia):
 17 |         super().__init__(reporter, mediainfo, "Error parsing IMDb/TMDb ids")
 18 |         self.tmdb = tmdb
 19 |         self.ia = ia
 20 | 
 21 |     # overriding abstract method
 22 |     def get_reply(self):
 23 |         reply = ""
 24 | 
 25 |         imdb_movie, tmdb_info, tmdb_year = None, None, None
 26 | 
 27 |         movie_data = {"name": None, "year": None}
 28 | 
 29 |         matched = {
 30 |             "imdb_title": False,
 31 |             "imdb_year": False,
 32 |             "tmdb_title": False,
 33 |             "tmdb_year": False,
 34 |             # matched movie title/year with either imdb or tmdb
 35 |             "title": False,
 36 |             "year": False,
 37 |             "title_replied": False,
 38 |             "year_replied": False,
 39 |         }
 40 | 
 41 |         # is it a movie or tv show?
 42 |         is_movie = self._is_movie()
 43 | 
 44 |         # extract movie name and year or tv show name
 45 |         if has(self.mediainfo, "general.0.movie_name"):
 46 |             if is_movie:
 47 |                 # movie
 48 |                 movie_name = re.search(
 49 |                     r"^(.+)\((\d{4})\)", self.mediainfo["general"][0]["movie_name"]
 50 |                 )
 51 |                 if movie_name:
 52 |                     movie_data["name"] = movie_name.group(1).strip()
 53 |                     movie_data["year"] = movie_name.group(2).strip()
 54 |             else:
 55 |                 # tv show
 56 |                 tv_show_name = re.search(
 57 |                     r"^(.+)\s-\s.+\s-\s.+", self.mediainfo["general"][0]["movie_name"]
 58 |                 )
 59 |                 if tv_show_name:
 60 |                     movie_data["name"] = tv_show_name.group(1).strip()
 61 | 
 62 |         if has(self.mediainfo, "general.0.imdb"):
 63 |             imdb_id = "".join(
 64 |                 re.findall(r"[\d]+", self.mediainfo["general"][0]["imdb"])
 65 |             )
 66 |             try:
 67 |                 imdb_movie = self.ia.get_movie(imdb_id)
 68 |             except imdb._exceptions.IMDbParserError:
 69 |                 reply += self.reporter.print_report(
 70 |                     "error",
 71 |                     "Invalid IMDb id: `" + self.mediainfo["general"][0]["imdb"] + "`",
 72 |                 )
 73 |             except:
 74 |                 # imdb._exceptions.IMDbDataAccessError
 75 |                 reply += self.reporter.print_report(
 76 |                     "info",
 77 |                     "Failed to get IMDb movie data for id: `"
 78 |                     + self.mediainfo["general"][0]["imdb"]
 79 |                     + "`",
 80 |                 )
 81 |             else:
 82 |                 # force single space in movie name
 83 |                 imdb_movie["title"] = re.sub(r"\s+", " ", imdb_movie["title"])
 84 |                 matched["imdb_title"] = movie_data["name"] == imdb_movie["title"]
 85 |                 if is_movie:
 86 |                     matched["imdb_year"] = self._year_range(
 87 |                         imdb_movie["year"], movie_data["year"]
 88 |                     )
 89 | 
 90 |         if has(self.mediainfo, "general.0.tmdb"):
 91 |             tmdb_id = "".join(
 92 |                 re.findall(r"[\d]+", self.mediainfo["general"][0]["tmdb"])
 93 |             )
 94 |             # movie or tv show
 95 |             tmdb_data = self.tmdb.Movies(tmdb_id) if is_movie else self.tmdb.TV(tmdb_id)
 96 | 
 97 |             try:
 98 |                 tmdb_info = tmdb_data.info()
 99 |                 # force single space in movie name
100 |                 if "title" in tmdb_info:
101 |                     tmdb_info["title"] = re.sub(r"\s+", " ", tmdb_info["title"])
102 |             except:
103 |                 reply += self.reporter.print_report(
104 |                     "info",
105 |                     "Failed to get TMDb data for id: `"
106 |                     + self.mediainfo["general"][0]["tmdb"]
107 |                     + "`",
108 |                 )
109 |             else:
110 |                 if is_movie:
111 |                     # movie
112 |                     if "release_date" in tmdb_info and tmdb_info["release_date"]:
113 |                         datetime_obj = datetime.datetime.strptime(
114 |                             tmdb_info["release_date"], "%Y-%m-%d"
115 |                         )
116 |                         tmdb_year = str(datetime_obj.year)
117 |                     # tmdb_info["original_title"] is original title
118 |                     # tmdb_info["title"] is the translated title in whatever language you're requesting
119 |                     matched["tmdb_title"] = (
120 |                         "title" in tmdb_info
121 |                         and movie_data["name"] == tmdb_info["title"]
122 |                     )
123 |                     matched["tmdb_year"] = tmdb_year and self._year_range(
124 |                         tmdb_year, movie_data["year"]
125 |                     )
126 |                 else:
127 |                     # tv show
128 |                     matched["tmdb_title"] = (
129 |                         "title" in tmdb_info
130 |                         and movie_data["name"] == tmdb_info["title"]
131 |                     )
132 | 
133 |         # matched title/year with either imdb or tmdb
134 |         matched["title"] = matched["imdb_title"] or matched["tmdb_title"]
135 |         matched["year"] = matched["imdb_year"] or matched["tmdb_year"]
136 | 
137 |         if has(self.mediainfo, "general.0.imdb") or has(
138 |             self.mediainfo, "general.0.tmdb"
139 |         ):
140 |             if is_movie:
141 |                 # movie
142 |                 if matched["title"] and matched["year"]:
143 |                     reply += self.reporter.print_report(
144 |                         "correct", "Matched movie name and year with IMDb/TMDb"
145 |                     )
146 |                 else:
147 |                     if matched["title"]:
148 |                         reply += self.reporter.print_report(
149 |                             "correct", "Matched movie name with IMDb/TMDb"
150 |                         )
151 |                     else:
152 |                         if imdb_movie and "title" in imdb_movie and imdb_movie["title"]:
153 |                             reply += self.reporter.print_report(
154 |                                 "error", "IMDb: Name: `" + imdb_movie["title"] + "`"
155 |                             )
156 |                             if movie_data["name"]:
157 |                                 reply += show_diff(
158 |                                     movie_data["name"], imdb_movie["title"]
159 |                                 )
160 |                             matched["title_replied"] = True
161 |                         # tmdb_info["original_title"] is original title
162 |                         # tmdb_info["title"] is the translated title in whatever language you're requesting
163 |                         if tmdb_info and "title" in tmdb_info and tmdb_info["title"]:
164 |                             reply += self.reporter.print_report(
165 |                                 "error", "TMDb: Name: `" + tmdb_info["title"] + "`"
166 |                             )
167 |                             if movie_data["name"]:
168 |                                 reply += show_diff(
169 |                                     movie_data["name"], tmdb_info["title"]
170 |                                 )
171 |                             matched["title_replied"] = True
172 |                         if not matched["title_replied"]:
173 |                             reply += self.reporter.print_report(
174 |                                 "error", "Failed to match movie name with IMDb/TMDb"
175 |                             )
176 | 
177 |                     if matched["year"]:
178 |                         reply += self.reporter.print_report(
179 |                             "correct", "Matched movie year with IMDb/TMDb"
180 |                         )
181 |                     else:
182 |                         if imdb_movie and "year" in imdb_movie:
183 |                             reply += self.reporter.print_report(
184 |                                 "error", "IMDb: Year: `" + str(imdb_movie["year"]) + "`"
185 |                             )
186 |                             matched["year_replied"] = True
187 |                         if tmdb_year:
188 |                             reply += self.reporter.print_report(
189 |                                 "error", "TMDb: Year: `" + str(tmdb_year) + "`"
190 |                             )
191 |                             matched["year_replied"] = True
192 |                         if not matched["year_replied"]:
193 |                             reply += self.reporter.print_report(
194 |                                 "error", "Failed to match movie year with IMDb/TMDb"
195 |                             )
196 |             else:
197 |                 # tv show
198 |                 if matched["title"]:
199 |                     reply += self.reporter.print_report(
200 |                         "correct", "Matched tv show name with IMDb/TMDb"
201 |                     )
202 |                 else:
203 |                     if imdb_movie and "title" in imdb_movie:
204 |                         reply += self.reporter.print_report(
205 |                             "error", "IMDb: Name: `" + imdb_movie["title"] + "`"
206 |                         )
207 |                         matched["title_replied"] = True
208 |                     if tmdb_info and "name" in tmdb_info:
209 |                         reply += self.reporter.print_report(
210 |                             "error", "TMDb: Name: `" + tmdb_info["name"] + "`"
211 |                         )
212 |                         matched["title_replied"] = True
213 |                     if not matched["title_replied"]:
214 |                         reply += self.reporter.print_report(
215 |                             "error", "Failed to match tv show name with IMDb/TMDb"
216 |                         )
217 | 
218 |         return reply
219 | 
220 |     def _year_range(self, year, test_year, offset=MOVIE_YEAR_OFFSET):
221 |         # self._year_range(year, test_year)
222 |         # example: with offset = 1, and year = 2004, test_year can be between 2003 and 2005 inclusive
223 |         # 2002 in range(2004 - 1, (2004 + 1) + 1) False
224 |         # 2003 in range(2004 - 1, (2004 + 1) + 1) True
225 |         # 2004 in range(2004 - 1, (2004 + 1) + 1) True
226 |         # 2005 in range(2004 - 1, (2004 + 1) + 1) True
227 |         # 2006 in range(2004 - 1, (2004 + 1) + 1) False
228 |         if not (year and test_year):
229 |             return False
230 |         year = int(year)
231 |         test_year = int(test_year)
232 |         return test_year in range(year - offset, (year + offset) + 1)
233 | 


--------------------------------------------------------------------------------
/vdator/checks/mixins/__init__.py:
--------------------------------------------------------------------------------
1 | from .is_commentary_track import *
2 | from .is_movie import *
3 | from .print_header import *
4 | from .section_id import *
5 | 


--------------------------------------------------------------------------------
/vdator/checks/mixins/is_commentary_track.py:
--------------------------------------------------------------------------------
1 | class IsCommentaryTrack(object):
2 |     def _is_commentary_track(self, title):
3 |         return "commentary" in title.lower().split()
4 | 


--------------------------------------------------------------------------------
/vdator/checks/mixins/is_movie.py:
--------------------------------------------------------------------------------
 1 | from pydash import has
 2 | import re
 3 | 
 4 | 
 5 | class IsMovie(object):
 6 | 
 7 |     # returns True if its a movie, False if tv show
 8 |     def _is_movie(self):
 9 |         # is it a movie or tv show? assume movie
10 |         is_movie = True
11 |         determined_movie_or_tv = False
12 | 
13 |         if has(self.mediainfo, "general.0.tmdb"):
14 |             if self.mediainfo["general"][0]["tmdb"].startswith("movie/"):
15 |                 is_movie = True
16 |                 determined_movie_or_tv = True
17 |             elif self.mediainfo["general"][0]["tmdb"].startswith("tv/"):
18 |                 is_movie = False
19 |                 determined_movie_or_tv = True
20 | 
21 |         if not determined_movie_or_tv:
22 |             if has(self.mediainfo, "general.0.movie_name"):
23 |                 # tv show name in format "Name - S01E01" or "Name - S01E01E02"
24 |                 is_tv = re.search(
25 |                     r"^.+\s-\sS\d{2}(E\d{2})+.*$",
26 |                     self.mediainfo["general"][0]["movie_name"],
27 |                 )
28 |                 if is_tv:
29 |                     is_movie = not (is_tv)
30 |         return is_movie
31 | 


--------------------------------------------------------------------------------
/vdator/checks/mixins/print_header.py:
--------------------------------------------------------------------------------
1 | class PrintHeader(object):
2 |     def _print_header(self, heading):
3 |         return "> **{}**\n".format(heading)
4 | 


--------------------------------------------------------------------------------
/vdator/checks/mixins/section_id.py:
--------------------------------------------------------------------------------
1 | class SectionId(object):
2 |     def _section_id(self, section, i):
3 |         reply = ""
4 |         if "id" in self.mediainfo[section.lower()][i]:
5 |             reply += "#" + self.mediainfo[section.lower()][i]["id"]
6 |         else:
7 |             reply += str(i)
8 |         return reply
9 | 


--------------------------------------------------------------------------------
/vdator/checks/mkvmerge.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | 
  3 | from dotenv import load_dotenv
  4 | import os, re, requests
  5 | 
  6 | # load environment variables
  7 | load_dotenv()
  8 | 
  9 | MKVMERGE_VERSION = os.environ.get("MKVMERGE_VERSION")
 10 | 
 11 | 
 12 | class CheckMKVMerge(Check):
 13 |     def __init__(self, reporter, mediainfo):
 14 |         super().__init__(
 15 |             reporter,
 16 |             mediainfo,
 17 |             "Error checking mkvtoolnix version",
 18 |         )
 19 | 
 20 |     def run(self):
 21 |         """
 22 |         Runs the check and returns reply.
 23 |         Wraps check in try...except to prevent crashes
 24 | 
 25 |         Returns
 26 |         -------
 27 |         reply string
 28 |         """
 29 |         reply = ""
 30 |         try:
 31 |             reply += self.get_reply(MKVMERGE_VERSION)
 32 |         except:
 33 |             traceback.print_exc()
 34 |             reply += self.reporter.print_report("fail", self.run_fail_msg)
 35 |         return reply
 36 | 
 37 |     # overriding abstract method
 38 |     # force_version = "Version 57.0.0 \"Till The End\" 2021-05-22"
 39 |     # force_version = "Version 76.0 \"Celebration\" 2023-04-30"
 40 |     def get_reply(self, force_version=None):
 41 |         reply = ""
 42 | 
 43 |         version_name_regex_mkvtoolnix = r'"(.*)"'
 44 |         version_name_regex_mediainfo = r"\'(.*)\'"
 45 |         version_num_regex = r"(\d+\.\d+(\.\d+)?)"
 46 | 
 47 |         if not has(self.mediainfo, "general.0.writing_application"):
 48 |             reply += self.reporter.print_report("info", "Not using mkvtoolnix")
 49 |             return reply
 50 | 
 51 |         mediainfo_version_num = re.search(
 52 |             version_num_regex, self.mediainfo["general"][0]["writing_application"]
 53 |         )
 54 |         if mediainfo_version_num:
 55 |             mediainfo_version_num = mediainfo_version_num.group(1)
 56 | 
 57 |         mediainfo_version_name = re.search(
 58 |             version_name_regex_mediainfo,
 59 |             self.mediainfo["general"][0]["writing_application"],
 60 |         )
 61 |         if mediainfo_version_name:
 62 |             mediainfo_version_name = mediainfo_version_name.group(1)
 63 | 
 64 |         if not mediainfo_version_num or not mediainfo_version_name:
 65 |             reply += self.reporter.print_report("info", "Not using mkvtoolnix")
 66 |             return reply
 67 | 
 68 |         try:
 69 |             r = requests.get(os.environ.get("MKVTOOLNIX_NEWS"))
 70 |             if r.status_code == 200:
 71 |                 ## Version 32.0.0 "Astral Progressions" 2019-03-12
 72 |                 ## Version 76.0 "Celebration" 2023-04-30
 73 |                 mkvtoolnix_version_line = r.text.splitlines()[0]
 74 |                 if force_version:
 75 |                     mkvtoolnix_version_line = force_version
 76 | 
 77 |                 mkvtoolnix_version_num = re.search(
 78 |                     version_num_regex, mkvtoolnix_version_line
 79 |                 )
 80 |                 if mkvtoolnix_version_num:
 81 |                     mkvtoolnix_version_num = mkvtoolnix_version_num.group(1)
 82 | 
 83 |                 mkvtoolnix_version_name = re.search(
 84 |                     version_name_regex_mkvtoolnix, mkvtoolnix_version_line
 85 |                 )
 86 |                 if mkvtoolnix_version_name:
 87 |                     mkvtoolnix_version_name = mkvtoolnix_version_name.group(1)
 88 | 
 89 |                 if (
 90 |                     mkvtoolnix_version_num == mediainfo_version_num
 91 |                     and mkvtoolnix_version_name == mediainfo_version_name
 92 |                 ):
 93 |                     reply += self.reporter.print_report(
 94 |                         "correct",
 95 |                         "Uses latest mkvtoolnix: `"
 96 |                         + mediainfo_version_num
 97 |                         + ' "'
 98 |                         + mediainfo_version_name
 99 |                         + '"`',
100 |                     )
101 |                 else:
102 |                     reply += self.reporter.print_report(
103 |                         "warning",
104 |                         "Not using latest mkvtoolnix: `"
105 |                         + mediainfo_version_num
106 |                         + ' "'
107 |                         + mediainfo_version_name
108 |                         + '"` latest is: `'
109 |                         + mkvtoolnix_version_num
110 |                         + ' "'
111 |                         + mkvtoolnix_version_name
112 |                         + '"`',
113 |                     )
114 |         except:
115 |             reply += self.reporter.print_report(
116 |                 "info", "Could not fetch latest mkvtoolnix version"
117 |             )
118 |             return reply
119 | 
120 |         return reply
121 | 


--------------------------------------------------------------------------------
/vdator/checks/movie_name_format.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import IsMovie
 3 | 
 4 | import re
 5 | 
 6 | 
 7 | class CheckMovieNameFormat(Check, IsMovie):
 8 |     def __init__(self, reporter, mediainfo):
 9 |         super().__init__(reporter, mediainfo, "Error parsing movie name")
10 | 
11 |     # overriding abstract method
12 |     def get_reply(self):
13 |         reply = ""
14 | 
15 |         # is it a movie or tv show?
16 |         is_movie = self._is_movie()
17 | 
18 |         if has(self.mediainfo, "general.0.movie_name"):
19 |             if is_movie:
20 |                 # movie name in format "Name (Year)"
21 |                 if re.search(
22 |                     r"^.+\(\d{4}\)$", self.mediainfo["general"][0]["movie_name"]
23 |                 ):
24 |                     reply += self.reporter.print_report(
25 |                         "correct",
26 |                         "Movie name format `Name (Year)`: `"
27 |                         + self.mediainfo["general"][0]["movie_name"]
28 |                         + "`",
29 |                     )
30 |                 else:
31 |                     reply += self.reporter.print_report(
32 |                         "error",
33 |                         "Movie name does not match format `Name (Year)`: `"
34 |                         + self.mediainfo["general"][0]["movie_name"]
35 |                         + "`",
36 |                     )
37 |                     reply += self._movie_name_extra_space(
38 |                         self.mediainfo["general"][0]["movie_name"]
39 |                     )
40 |             else:
41 |                 # tv show name in format "Name - S01E01" or "Name - S01E01E02"
42 |                 if re.search(
43 |                     r"^.+\s-\sS\d{2}(E\d{2})+.*$",
44 |                     self.mediainfo["general"][0]["movie_name"],
45 |                 ):
46 |                     reply += self.reporter.print_report(
47 |                         "correct",
48 |                         "TV show name format `Name - S01E01`: `"
49 |                         + self.mediainfo["general"][0]["movie_name"]
50 |                         + "`",
51 |                     )
52 |                 else:
53 |                     reply += self.reporter.print_report(
54 |                         "error",
55 |                         "TV show name does not match format `Name - S01E01`: `"
56 |                         + self.mediainfo["general"][0]["movie_name"]
57 |                         + "`",
58 |                     )
59 |                     reply += self._movie_name_extra_space(
60 |                         self.mediainfo["general"][0]["movie_name"]
61 |                     )
62 |         else:
63 |             reply += self.reporter.print_report("error", "Missing movie name")
64 | 
65 |         return reply
66 | 
67 |     def _movie_name_extra_space(self, movie_name):
68 |         reply = ""
69 | 
70 |         if movie_name.startswith(" "):
71 |             reply += self.reporter.print_report(
72 |                 "error", "Movie name starts with an extra space!"
73 |             )
74 | 
75 |         if movie_name.endswith(" "):
76 |             reply += self.reporter.print_report(
77 |                 "error", "Movie name ends with an extra space!"
78 |             )
79 | 
80 |         return reply
81 | 


--------------------------------------------------------------------------------
/vdator/checks/muxing_mode.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | 
 4 | class CheckMuxingMode(Check):
 5 |     def __init__(self, reporter, mediainfo):
 6 |         super().__init__(
 7 |             reporter,
 8 |             mediainfo,
 9 |             "Error checking muxing mode",
10 |         )
11 | 
12 |     # overriding abstract method
13 |     def get_reply(self):
14 |         reply, is_valid = "", True
15 | 
16 |         for section in ["general", "video", "audio", "text"]:
17 |             for i, _ in enumerate(self.mediainfo[section]):
18 |                 if "muxing_mode" in self.mediainfo[section][i]:
19 |                     reply += self.reporter.print_report(
20 |                         "error",
21 |                         section.capitalize()
22 |                         + " #"
23 |                         + self.mediainfo[section][i]["id"]
24 |                         + " has muxing mode: `"
25 |                         + self.mediainfo[section][i]["muxing_mode"]
26 |                         + "`",
27 |                     )
28 |                     is_valid = False
29 | 
30 |         if is_valid:
31 |             reply += self.reporter.print_report(
32 |                 "correct", "All tracks do not have a muxing mode"
33 |             )
34 | 
35 |         return reply
36 | 


--------------------------------------------------------------------------------
/vdator/checks/print_audio_track_names.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | 
 5 | class CheckPrintAudioTrackNames(Check, SectionId):
 6 |     def __init__(self, reporter, mediainfo):
 7 |         super().__init__(reporter, mediainfo, "Error printing audio track names")
 8 | 
 9 |     # overriding abstract method
10 |     def get_reply(self):
11 |         reply = ""
12 | 
13 |         if len(self.mediainfo["audio"]) > 0:
14 |             reply += "Audio Track Names:\n"
15 |             reply += "```"
16 |             for i, _ in enumerate(self.mediainfo["audio"]):
17 |                 reply += self._section_id("audio", i) + ": "
18 |                 if "title" in self.mediainfo["audio"][i]:
19 |                     reply += self.mediainfo["audio"][i]["title"] + "\n"
20 |             reply += "```"
21 |         else:
22 |             reply = self.reporter.print_report("error", "No audio tracks")
23 | 
24 |         return reply
25 | 


--------------------------------------------------------------------------------
/vdator/checks/print_chapters.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | import re
 4 | 
 5 | 
 6 | class CheckPrintChapters(Check):
 7 |     def __init__(self, reporter, mediainfo):
 8 |         super().__init__(reporter, mediainfo, "Error printing chapters")
 9 | 
10 |     # overriding abstract method
11 |     def get_reply(self):
12 |         reply = ""
13 | 
14 |         if len(self.mediainfo["menu"]) > 0:
15 |             for i, menu in enumerate(self.mediainfo["menu"]):
16 |                 reply += f"> **Chapters {i + 1}**\n"
17 |                 numbered_chapters = True
18 |                 for ch in menu:
19 |                     for title in ch["titles"]:
20 |                         if not re.search(
21 |                             r"^chapter\s\d+", title["title"], re.IGNORECASE
22 |                         ):
23 |                             numbered_chapters = False
24 | 
25 |                 if not numbered_chapters:
26 |                     reply += "```"
27 |                     for ch in menu:
28 |                         if ch["time"]:
29 |                             reply += ch["time"] + " :"
30 |                         for title in ch["titles"]:
31 |                             if title["language"]:
32 |                                 reply += " lang: " + title["language"]
33 |                             if title["title"]:
34 |                                 reply += " title: " + title["title"]
35 |                         reply += "\n"
36 |                     reply += "```"
37 |                 else:
38 |                     reply += self.reporter.print_report("info", "Chapters are numbered")
39 |                 if len(menu[0]["languages"]) > 0 and menu[0]["languages"][0] != "":
40 |                     reply += (
41 |                         "Chapter languages: `" + ", ".join(menu[0]["languages"]) + "`\n"
42 |                     )
43 |         else:
44 |             reply += self.reporter.print_report("info", "No chapters")
45 | 
46 |         return reply
47 | 


--------------------------------------------------------------------------------
/vdator/checks/print_text_tracks.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | 
 5 | class CheckPrintTextTracks(Check, SectionId):
 6 |     def __init__(self, reporter, mediainfo):
 7 |         super().__init__(reporter, mediainfo, "Error printing text tracks")
 8 | 
 9 |     # overriding abstract method
10 |     def get_reply(self):
11 |         reply = ""
12 |         if len(self.mediainfo["text"]) > 0:
13 |             reply += "```"
14 |             for i, _ in enumerate(self.mediainfo["text"]):
15 |                 reply += self._section_id("text", i) + ":"
16 |                 if "default" in self.mediainfo["text"][i]:
17 |                     reply += " default:" + self.mediainfo["text"][i]["default"]
18 |                 if "forced" in self.mediainfo["text"][i]:
19 |                     reply += " forced:" + self.mediainfo["text"][i]["forced"]
20 |                 if "language" in self.mediainfo["text"][i]:
21 |                     reply += " language:" + self.mediainfo["text"][i]["language"]
22 |                 if "title" in self.mediainfo["text"][i]:
23 |                     reply += " title: " + self.mediainfo["text"][i]["title"]
24 |                 reply += "\n"
25 |             reply += "```"
26 |         else:
27 |             reply += self.reporter.print_report("info", "No text tracks")
28 |         return reply
29 | 


--------------------------------------------------------------------------------
/vdator/checks/remove_until_first_codec.py:
--------------------------------------------------------------------------------
 1 | class RemoveUntilFirstCodec(object):
 2 |     def __init__(self, codecs):
 3 |         self.codecs = codecs
 4 | 
 5 |     def remove(self, title):
 6 |         title2, title_parts, found = title, list(), False
 7 |         if " / " in title:
 8 |             for part in title.split(" / "):
 9 |                 if self.codecs.is_audio_title(part):
10 |                     # stop when we get first codec
11 |                     found = True
12 |                     break
13 |                 else:
14 |                     title2_split = title2.split(" / ")
15 |                     # remove part since its not a codec
16 |                     title2 = " / ".join(title2_split[1:]).strip()
17 |                     # save part in list
18 |                     title_parts.append(title2_split[0])
19 |         return title2, title_parts, found
20 | 


--------------------------------------------------------------------------------
/vdator/checks/text_default_flag.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | 
 4 | class CheckTextDefaultFlag(Check):
 5 |     def __init__(self, reporter, mediainfo):
 6 |         super().__init__(reporter, mediainfo, "Error checking text track default flag")
 7 | 
 8 |     # overriding abstract method
 9 |     def get_reply(self):
10 |         # english subs for foreign films should be default=yes
11 |         reply = ""
12 | 
13 |         if len(self.mediainfo["text"]) > 0:
14 |             first_audio_language, has_english_subs, english_subs_default_yes = (
15 |                 False,
16 |                 False,
17 |                 False,
18 |             )
19 | 
20 |             if has(self.mediainfo, "audio.0.language"):
21 |                 first_audio_language = self.mediainfo["audio"][0]["language"].lower()
22 | 
23 |             if first_audio_language != "english":
24 |                 # text tracks with language and default keys
25 |                 text_with_properties = [
26 |                     item
27 |                     for item in self.mediainfo["text"]
28 |                     if ("language" in item and "default" in item)
29 |                 ]
30 |                 for item in text_with_properties:
31 |                     if item["language"].lower() == "english":
32 |                         has_english_subs = True
33 |                     if item["default"].lower() == "yes":
34 |                         english_subs_default_yes = True
35 |                     if has_english_subs and english_subs_default_yes:
36 |                         break
37 | 
38 |                 if has_english_subs:
39 |                     # foreign audio and has english subs. english subs should be default=yes
40 |                     if english_subs_default_yes:
41 |                         reply += self.reporter.print_report(
42 |                             "correct",
43 |                             "Foreign film, one of the English subtitles are `default=yes`",
44 |                         )
45 |                     else:
46 |                         reply += self.reporter.print_report(
47 |                             "error",
48 |                             "Foreign film, one of the English subtitles should be `default=yes`",
49 |                         )
50 | 
51 |         return reply
52 | 


--------------------------------------------------------------------------------
/vdator/checks/text_order.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | from .mixins import IsCommentaryTrack, SectionId
  3 | 
  4 | from collections import OrderedDict
  5 | import re
  6 | 
  7 | 
  8 | class CheckTextOrder(Check, IsCommentaryTrack, SectionId):
  9 |     """
 10 |     Checks text track order:
 11 |     Languages are in alphabetical order with English first
 12 |     Within language: No title, SDH, alphabetical
 13 |     Commentary subtitles after regular subtitles
 14 |     """
 15 | 
 16 |     def __init__(self, reporter, mediainfo):
 17 |         super().__init__(
 18 |             reporter,
 19 |             mediainfo,
 20 |             "Error checking text track order",
 21 |         )
 22 | 
 23 |     # overriding abstract method
 24 |     def get_reply(self):
 25 |         reply = ""
 26 | 
 27 |         if len(self.mediainfo["text"]) == 0:
 28 |             return reply
 29 | 
 30 |         # text_langs = ['German', 'English', ...]
 31 |         text_langs = [
 32 |             self._format_lang(text["language"]) for text in self.mediainfo["text"]
 33 |         ]
 34 |         # remove duplicates from list and preserve order
 35 |         text_langs = list(dict.fromkeys(text_langs))
 36 | 
 37 |         # list of text tracks by language
 38 |         #
 39 |         # OrderedDict([
 40 |         #   ('English', [{}, ...]), ('German', [{}, ...])
 41 |         # ])
 42 |         #
 43 |         text_tracks_by_lang = OrderedDict((k, list()) for k in text_langs)
 44 |         commentary_tracks_by_lang = OrderedDict((k, list()) for k in text_langs)
 45 |         has_commentary = False
 46 | 
 47 |         # get tracks by language, and separate commentary tracks
 48 |         for i, text in enumerate(self.mediainfo["text"]):
 49 |             text["title"] = text["title"] if "title" in text else ""
 50 |             if self._is_commentary_track(text["title"]):
 51 |                 commentary_tracks_by_lang[self._format_lang(text["language"])].append(
 52 |                     text
 53 |                 )
 54 |                 has_commentary = True
 55 |             else:
 56 |                 text_tracks_by_lang[self._format_lang(text["language"])].append(text)
 57 |             # forced english track should be first
 58 |             reply += self._forced_english_track_first(i, text)
 59 | 
 60 |         # languages should be in alphabetical order with English first
 61 |         reply += self._languages_in_order(text_tracks_by_lang, "Regular subs: ")
 62 |         if has_commentary:
 63 |             reply += self._languages_in_order(
 64 |                 commentary_tracks_by_lang, "Commentary subs: "
 65 |             )
 66 | 
 67 |         # subtitles in order within language: no title, SDH, rest in alphabetical order
 68 |         reply += "**Expected order within language:** No title, SDH, alphabetical\n"
 69 |         reply += self._subs_in_order_within_language(
 70 |             text_tracks_by_lang, "Regular subs: "
 71 |         )
 72 |         if has_commentary:
 73 |             reply += self._subs_in_order_within_language(
 74 |                 commentary_tracks_by_lang, "Commentary subs: "
 75 |             )
 76 | 
 77 |         # commentary tracks should be after regular subs
 78 |         if has_commentary:
 79 |             reply += self._commentary_last(
 80 |                 text_tracks_by_lang, commentary_tracks_by_lang
 81 |             )
 82 | 
 83 |         return reply
 84 | 
 85 |     def _format_lang(self, lang):
 86 |         """
 87 |         Format a text language to remove parenthesis
 88 |         English (US) becomes English
 89 |         """
 90 |         return re.sub(r"\([^)]*\)", "", lang).strip()
 91 | 
 92 |     def _forced_english_track_first(self, i, text_track):
 93 |         """
 94 |         Forced english track should be first
 95 |         Only checks tracks without titles, since titles have a predefined order: No title, SDH, alphabetical
 96 |         """
 97 |         reply = ""
 98 | 
 99 |         is_forced_track = (
100 |             text_track["forced"].lower() == "yes" if "forced" in text_track else False
101 |         )
102 |         is_english_track = text_track["language"].lower() == "english"
103 |         # only checks tracks without titles
104 |         title_is_blank = text_track["title"] == ""
105 |         is_first_track = i == 0
106 | 
107 |         if (
108 |             is_forced_track
109 |             and is_english_track
110 |             and title_is_blank
111 |             and not is_first_track
112 |         ):
113 |             # forced english track should be first
114 |             reply += self.reporter.print_report(
115 |                 "error",
116 |                 "Text {} is a forced English track, it should be first".format(
117 |                     self._section_id("text", i)
118 |                 ),
119 |             )
120 | 
121 |         return reply
122 | 
123 |     def _languages_in_order(self, text_tracks_by_lang, prefix=""):
124 |         """Languages should be in alphabetical order with English first"""
125 |         reply = ""
126 |         text_track_langs_order = list(text_tracks_by_lang.keys())
127 |         text_track_langs_expected_order = self._sort_sub_langs(text_track_langs_order)
128 | 
129 |         if text_track_langs_expected_order == text_track_langs_order:
130 |             reply += self.reporter.print_report(
131 |                 "correct",
132 |                 prefix + "Languages are in alphabetical order with English first",
133 |             )
134 |         else:
135 |             reply += self.reporter.print_report(
136 |                 "error",
137 |                 prefix
138 |                 + "Languages should be in alphabetical order with English first. Expected: `"
139 |                 + ", ".join(text_track_langs_expected_order)
140 |                 + "`",
141 |             )
142 | 
143 |         return reply
144 | 
145 |     def _commentary_last(self, text_tracks_by_lang, commentary_tracks_by_lang):
146 |         """Commentary tracks should be last"""
147 |         reply = ""
148 | 
149 |         if len(commentary_tracks_by_lang) > 0:
150 |             last_text_id = self._get_last_text_id(text_tracks_by_lang)
151 |             if last_text_id != -1:
152 |                 try:
153 |                     first_commentary = next(iter(commentary_tracks_by_lang.values()))
154 |                     if first_commentary and has(first_commentary, "0.id"):
155 |                         try:
156 |                             if last_text_id > int(first_commentary[0]["id"]):
157 |                                 # commentary tracks should be after regular subs
158 |                                 reply += self.reporter.print_report(
159 |                                     "error",
160 |                                     "Commentary subs should be after regular subs",
161 |                                 )
162 |                             else:
163 |                                 # commentary tracks are after regular subs
164 |                                 reply += self.reporter.print_report(
165 |                                     "correct",
166 |                                     "Commentary subs are after regular subs",
167 |                                 )
168 |                         except ValueError:
169 |                             pass
170 |                 except StopIteration:
171 |                     pass
172 | 
173 |         return reply
174 | 
175 |     def _subs_in_order_within_language(self, text_tracks_by_lang, prefix=""):
176 |         """
177 |         Subtitles in order within language
178 |         No title, SDH, rest in alphabetical order
179 |         """
180 |         reply = ""
181 |         for k, v in text_tracks_by_lang.items():
182 |             # k = 'English'
183 |             # v = tracks list [{}, ...]
184 | 
185 |             v_ids = [track["id"] for track in v]
186 |             expected_order = self._sort_subs_within_lang(v)
187 |             expected_order_ids = [track["id"] for track in expected_order]
188 | 
189 |             if v_ids != expected_order_ids:
190 |                 # subs for language are out of order
191 |                 reply += self.reporter.print_report(
192 |                     "warning",
193 |                     prefix
194 |                     + "Language: `{}`: Subtitles should be in order: `{}`".format(
195 |                         k, ", ".join(expected_order_ids)
196 |                     ),
197 |                 )
198 | 
199 |         return reply
200 | 
201 |     def _get_last_text_id(self, text_tracks_by_lang):
202 |         """Get track id of last subtitle"""
203 |         last_text_id = -1
204 |         for _, tracks in text_tracks_by_lang.items():
205 |             for track in tracks:
206 |                 try:
207 |                     curr_text_id = int(track["id"])
208 |                     if curr_text_id > last_text_id:
209 |                         last_text_id = curr_text_id
210 |                 except ValueError:
211 |                     continue
212 |         return last_text_id
213 | 
214 |     def _sort_sub_langs(self, languages):
215 |         """
216 |         Sort subs by language
217 |         English first, rest in alphabetical order
218 |         """
219 |         # English tracks first
220 |         tracks = [lang for lang in languages if lang.lower() == "english"]
221 |         # rest of the tracks by language in alphabetical order
222 |         rest = sorted([lang for lang in languages if lang.lower() != "english"])
223 | 
224 |         # add the rest of the tracks
225 |         if rest:
226 |             tracks.extend(rest)
227 | 
228 |         return tracks
229 | 
230 |     def _sort_subs_within_lang(self, text_tracks):
231 |         """
232 |         Sort subtitles within languages
233 |         No title, SDH, rest in alphabetical order
234 |         """
235 |         # ['', 'SDH', '...']
236 |         unparsed = text_tracks.copy()
237 |         parsed = []
238 | 
239 |         # add tracks with no title
240 |         for track in unparsed:
241 |             if track["title"] == "":
242 |                 parsed.append(track)
243 |         unparsed = [track for track in unparsed if track["title"] != ""]
244 | 
245 |         # add tracks with SDH
246 |         tracks_with_SDH = []
247 |         for track in unparsed:
248 |             if "SDH" in track["title"].split():
249 |                 tracks_with_SDH.append(track)
250 |         tracks_with_SDH = sorted(tracks_with_SDH, key=lambda track: track["title"])
251 |         if tracks_with_SDH:
252 |             parsed.extend(tracks_with_SDH)
253 |             unparsed = [
254 |                 track for track in unparsed if ("SDH" not in track["title"].split())
255 |             ]
256 | 
257 |         # sort rest of the tracks in alphabetical order
258 |         unparsed = sorted(unparsed, key=lambda track: track["title"])
259 | 
260 |         # add the rest of the tracks
261 |         if unparsed:
262 |             parsed.extend(unparsed)
263 | 
264 |         return parsed
265 | 


--------------------------------------------------------------------------------
/vdator/checks/tracks_have_language.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | from .mixins import SectionId
 3 | 
 4 | 
 5 | class CheckTracksHaveLanguage(Check, SectionId):
 6 |     def __init__(self, reporter, mediainfo):
 7 |         super().__init__(reporter, mediainfo, "Error checking if tracks have language")
 8 | 
 9 |     # overriding abstract method
10 |     def get_reply(self):
11 |         reply, is_valid = "", True
12 | 
13 |         for section in ["video", "audio", "text"]:
14 |             for i, _ in enumerate(self.mediainfo[section]):
15 |                 if "language" not in self.mediainfo[section][i]:
16 |                     reply += self.reporter.print_report(
17 |                         "error",
18 |                         section.capitalize()
19 |                         + " "
20 |                         + self._section_id(section, i)
21 |                         + ": Does not have a language chosen",
22 |                     )
23 |                     is_valid = False
24 | 
25 |         if is_valid:
26 |             reply += self.reporter.print_report(
27 |                 "correct", "All tracks have a language chosen"
28 |             )
29 | 
30 |         return reply
31 | 


--------------------------------------------------------------------------------
/vdator/checks/video_language_matches_first_audio_language.py:
--------------------------------------------------------------------------------
 1 | from .check import *
 2 | 
 3 | 
 4 | class CheckVideoLanguageMatchesFirstAudioLanguage(Check):
 5 |     def __init__(self, reporter, mediainfo):
 6 |         super().__init__(
 7 |             reporter,
 8 |             mediainfo,
 9 |             "Error checking that video language matches first audio language",
10 |         )
11 | 
12 |     # overriding abstract method
13 |     def get_reply(self):
14 |         reply = ""
15 | 
16 |         if not has(self.mediainfo, "video.0.language"):
17 |             reply += self.reporter.print_report("error", "Video language not set")
18 |             return reply
19 |         if not has(self.mediainfo, "audio.0.language"):
20 |             reply += self.reporter.print_report("error", "First audio language not set")
21 |             return reply
22 |         if (
23 |             self.mediainfo["video"][0]["language"]
24 |             == self.mediainfo["audio"][0]["language"]
25 |         ):
26 |             reply += self.reporter.print_report(
27 |                 "correct",
28 |                 "Video language matches first audio language: `"
29 |                 + self.mediainfo["video"][0]["language"]
30 |                 + "`",
31 |             )
32 |         else:
33 |             reply += self.reporter.print_report(
34 |                 "error",
35 |                 "Video language does not match first audio language: `"
36 |                 + self.mediainfo["video"][0]["language"]
37 |                 + "` vs `"
38 |                 + self.mediainfo["audio"][0]["language"]
39 |                 + "`",
40 |             )
41 |         return reply
42 | 


--------------------------------------------------------------------------------
/vdator/checks/video_track.py:
--------------------------------------------------------------------------------
  1 | from .check import *
  2 | 
  3 | import re
  4 | 
  5 | 
  6 | class CheckVideoTrack(Check):
  7 |     def __init__(self, reporter, source_detector, codecs, mediainfo, bdinfo):
  8 |         super().__init__(reporter, mediainfo, "Error checking video track name")
  9 |         self.source_detector = source_detector
 10 |         self.codecs = codecs
 11 |         self.bdinfo = bdinfo
 12 | 
 13 |     # overriding abstract method
 14 |     def get_reply(self):
 15 |         reply = ""
 16 | 
 17 |         if (
 18 |             has_many(
 19 |                 self.mediainfo,
 20 |                 "video.0",
 21 |                 [
 22 |                     "format",
 23 |                     "format_version",
 24 |                     "bit_rate",
 25 |                     "height",
 26 |                     "scan_type",
 27 |                     "frame_rate",
 28 |                     "display_aspect_ratio",
 29 |                     "title",
 30 |                 ],
 31 |             )
 32 |             and self.source_detector.is_dvd()
 33 |         ):
 34 |             # dvd video title from mediainfo
 35 |             video_title = self._dvd_video_title_from_mediainfo()
 36 |             mediainfo_title = self.mediainfo["video"][0]["title"]
 37 | 
 38 |             if mediainfo_title == video_title:
 39 |                 reply += self.reporter.print_report(
 40 |                     "correct",
 41 |                     "Video track names match: ```" + mediainfo_title + "```",
 42 |                     new_line=False,
 43 |                 )
 44 |             else:
 45 |                 reply += self.reporter.print_report(
 46 |                     "error",
 47 |                     "Video track names missmatch:\n```fix\nExpected: "
 48 |                     + video_title
 49 |                     + "\nMediaInfo: "
 50 |                     + mediainfo_title
 51 |                     + "```",
 52 |                     new_line=False,
 53 |                 )
 54 |                 reply += show_diff(mediainfo_title, video_title)
 55 | 
 56 |         elif has(self.bdinfo, "video") and has(self.mediainfo, "video"):
 57 |             if len(self.bdinfo["video"]) < 1:
 58 |                 reply += self.reporter.print_report(
 59 |                     "error", "Missing bdinfo video track"
 60 |                 )
 61 |                 return reply
 62 |             elif len(self.mediainfo["video"]) < 1:
 63 |                 reply += self.reporter.print_report(
 64 |                     "error", "Missing mediainfo video track"
 65 |                 )
 66 |                 return reply
 67 | 
 68 |             if has(self.mediainfo, "video.0.title") and has(self.bdinfo, "video.0"):
 69 |                 mediainfo_video_title = self.mediainfo["video"][0]["title"]
 70 |                 bdinfo_video_title = self.bdinfo["video"][0]
 71 | 
 72 |                 # 1080i @ 25fps is actually progressive
 73 |                 reply += self._actually_progressive()
 74 | 
 75 |                 bitrate_search = re.search(r"(\d+\.\d+)\skbps", mediainfo_video_title)
 76 |                 if bitrate_search:
 77 |                     # if mediainfo has a decimal kbps bitrate, use it in the bdinfo for comparison
 78 |                     percise_bitrate = bitrate_search.group(1)
 79 |                     percise_kbps = percise_bitrate + " kbps"
 80 |                     bdinfo_video_title = re.sub(
 81 |                         r"(\d+)\skbps", percise_kbps, bdinfo_video_title
 82 |                     )
 83 |                 if self.source_detector.is_dv() and mediainfo_video_title.startswith(
 84 |                     bdinfo_video_title
 85 |                 ):
 86 |                     # if source is dolby vision, only check that the first part of mediainfo video title
 87 |                     # matches bdinfo video title. Up to BT.2020, i.e. Dolby Vision FEL is not checked
 88 |                     reply += self.reporter.print_report(
 89 |                         "correct",
 90 |                         "Video track names match: ```" + mediainfo_video_title + "```",
 91 |                         new_line=False,
 92 |                     )
 93 |                 elif bdinfo_video_title == mediainfo_video_title:
 94 |                     reply += self.reporter.print_report(
 95 |                         "correct",
 96 |                         "Video track names match: ```" + bdinfo_video_title + "```",
 97 |                         new_line=False,
 98 |                     )
 99 |                 else:
100 |                     reply += self.reporter.print_report(
101 |                         "error",
102 |                         "Video track names missmatch:\n```fix\nBDInfo: "
103 |                         + bdinfo_video_title
104 |                         + "\nMediaInfo: "
105 |                         + mediainfo_video_title
106 |                         + "```",
107 |                         new_line=False,
108 |                     )
109 |                     reply += show_diff(mediainfo_video_title, bdinfo_video_title)
110 |             else:
111 |                 reply += self.reporter.print_report(
112 |                     "error", "Missing mediainfo video track"
113 |                 )
114 |                 return reply
115 |         else:
116 |             reply += self.reporter.print_report("error", "Could not verify video track")
117 | 
118 |         return reply
119 | 
120 |     def _dvd_video_title_from_mediainfo(self):
121 |         # dictionary existence already checked
122 | 
123 |         video_title = ""
124 |         # MPEG-
125 |         video_title += self.mediainfo["video"][0]["format"].split()[0] + "-"
126 | 
127 |         # 1
128 |         video_title += "".join(
129 |             re.findall(r"[\d]+", self.mediainfo["video"][0]["format_version"])
130 |         )
131 |         video_title += " Video / "
132 | 
133 |         # bitrate
134 |         video_title += (
135 |             "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["bit_rate"]))
136 |             + " kbps"
137 |         )
138 |         video_title += " / "
139 | 
140 |         # height
141 |         video_title += "".join(
142 |             re.findall(r"[\d]+", self.mediainfo["video"][0]["height"])
143 |         )
144 | 
145 |         # scan type
146 |         (scan_type, _) = self.codecs.get_scan_type_title_name(
147 |             self.mediainfo["video"][0]["scan_type"].lower(), 0
148 |         )
149 |         video_title += scan_type
150 |         video_title += " / "
151 | 
152 |         # fps
153 |         video_fps = float(
154 |             "".join(re.findall(r"\d+\.\d+", self.mediainfo["video"][0]["frame_rate"]))
155 |         )
156 |         if video_fps.is_integer():
157 |             video_fps = int(video_fps)
158 |         video_title += str(video_fps) + " fps / "
159 | 
160 |         # aspect ratio
161 |         video_title += self.mediainfo["video"][0]["display_aspect_ratio"]
162 | 
163 |         return video_title
164 | 
165 |     def _actually_progressive(self):
166 |         # dictionary existence already checked
167 | 
168 |         reply = ""
169 | 
170 |         bdinfo_video_title = self.bdinfo["video"][0]
171 |         bdinfo_video_parts = bdinfo_video_title.split(" / ")
172 | 
173 |         if len(bdinfo_video_parts) >= 3:
174 |             scan_type = bdinfo_video_parts[2].strip()[-1].lower()
175 |             video_fps = float(
176 |                 "".join(
177 |                     re.findall(r"\d*\.\d+|\d+", bdinfo_video_parts[3].strip().lower())
178 |                 )
179 |             )
180 |             (_, actually_progressive) = self.codecs.get_scan_type_title_name(
181 |                 scan_type, video_fps
182 |             )
183 |             if actually_progressive:
184 |                 reply += self.reporter.print_report(
185 |                     "info", "Note: 1080i @ 25fps is actually progressive"
186 |                 )
187 | 
188 |         return reply
189 | 


--------------------------------------------------------------------------------
/vdator/data/codecs.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "codecs": {
 3 |         "video": {
 4 |             "h264/AVC": ".h264",
 5 |             "h264/MVC": ".mvc",
 6 |             "h265/HEVC": ".h265",
 7 |             "MPEG1": ".m1v",
 8 |             "MPEG2": ".m2v",
 9 |             "VC-1": ".vc1"
10 |         },
11 |         "video_3d": {
12 |             "h264/MVC": ".mvc"
13 |         },
14 |         "audio": {
15 |             "AC3": ".ac3",
16 |             "AC3 EX": ".ac3",
17 |             "AC3 Surround": ".ac3",
18 |             "DTS Hi-Res": ".dtshr",
19 |             "DTS Master Audio": ".dtsma",
20 |             "DTS": ".dts",
21 |             "FLAC Audio": ".flac",
22 |             "RAW/PCM": ".pcm",
23 |             "TrueHD/AC3": ".thd",
24 |             "TrueHD/AC3 (Atmos)": ".thd"
25 |         },
26 |         "subtitles": {
27 |             "Subtitle (PGS)": ".sup",
28 |             "Subtitle (DVD)": ".sup"
29 |         },
30 |         "chapters": {
31 |             "Chapters": ".txt"
32 |         }
33 |     },
34 |     "track_titles": {
35 |         "video": {
36 |             "MPEG-1 Video": "MPEG-1",
37 |             "MPEG-2 Video": "MPEG-2",
38 |             "MPEG-4 AVC Video": "AVC",
39 |             "MPEG-H HEVC Video": "HEVC",
40 |             "VC-1 Video": "VC-1"
41 |         },
42 |         "audio": {
43 |             "DTS Audio": "DTS",
44 |             "DTS-HD High-Res Audio": "DTS-HD.HR",
45 |             "DTS-HD Master Audio": "DTS-HD.MA",
46 |             "DTS:X Master Audio": "DTS-X",
47 |             "Dolby Digital Audio": "DD",
48 |             "Dolby Digital EX Audio": "DD-EX",
49 |             "Dolby Digital Plus Audio": "DDP",
50 |             "Dolby TrueHD Audio": "TrueHD",
51 |             "Dolby TrueHD/Atmos Audio": "TrueHD.Atmos",
52 |             "FLAC Audio": "FLAC"
53 |         }
54 |     },
55 |     "scan_types": {
56 |         "interlaced" : "i",
57 |         "mbaff" : "i",
58 |         "progressive" : "p"
59 |     }
60 | }


--------------------------------------------------------------------------------
/vdator/data/urls.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "urls": {
 3 |         "dpaste.com": {
 4 |             "slug_regex": "https?://dpaste.com/(.*)",
 5 |             "raw_url_regex": "https?://dpaste.com/{}.txt",
 6 |             "raw_url": "https://dpaste.com/{}.txt"
 7 |         },
 8 |         "dpaste.org": {
 9 |             "slug_regex": "https?://dpaste.org/(.*)",
10 |             "raw_url_regex": "https?://dpaste.org/{}/raw",
11 |             "raw_url": "https://dpaste.org/{}/raw"
12 |         },
13 |         "hastebin.com": {
14 |             "slug_regex": "https?://hastebin.com/(.*)",
15 |             "raw_url_regex": "https?://hastebin.com/raw/{}",
16 |             "raw_url": "https://hastebin.com/raw/{}"
17 |         },
18 |         "www.heypasteit.com": {
19 |             "slug_regex": "https?://www.heypasteit.com/clip/(.*)",
20 |             "raw_url_regex": "https?://www.heypasteit.com/download/{}",
21 |             "raw_url": "https://www.heypasteit.com/download/{}"
22 |         },
23 |         "paste.centos.org": {
24 |             "slug_regex": "https?://paste.centos.org/view/(.*)",
25 |             "raw_url_regex": "https?://paste.centos.org/view/raw/{}",
26 |             "raw_url": "https://paste.centos.org/view/raw/{}"
27 |         },
28 |         "paste.ee": {
29 |             "slug_regex": "https?://paste.ee/p/(.*)",
30 |             "raw_url_regex": "https?://paste.ee/d/{}",
31 |             "raw_url": "https://paste.ee/d/{}"
32 |         },
33 |         "paste.opensuse.org": {
34 |             "slug_regex": "https?://paste.opensuse.org/(.*)",
35 |             "raw_url_regex": "https?://paste.opensuse.org/view/raw/{}",
36 |             "raw_url": "https://paste.opensuse.org/view/raw/{}"
37 |         },
38 |         "pastebin.com": {
39 |             "slug_regex": "https?://pastebin.com/(.*)",
40 |             "raw_url_regex": "https?://pastebin.com/raw/{}",
41 |             "raw_url": "https://pastebin.com/raw/{}"
42 |         },
43 |         "rentry.co": {
44 |             "slug_regex": "https?://rentry.co/(.*)",
45 |             "raw_url_regex": "https?://rentry.co/{}/raw",
46 |             "raw_url": "https://rentry.co/{}/raw"
47 |         },
48 |         "termbin.com": {
49 |             "slug_regex": "https?://termbin.com/(.*)",
50 |             "raw_url_regex": "https?://termbin.com/{}",
51 |             "raw_url": "https://termbin.com/{}"
52 |         },
53 |         "textbin.net": {
54 |             "slug_regex": "https?://textbin.net/(.*)",
55 |             "raw_url_regex": "https?://textbin.net/raw/{}",
56 |             "raw_url": "https://textbin.net/raw/{}"
57 |         }
58 |     }
59 | }


--------------------------------------------------------------------------------
/vdator/helpers.py:
--------------------------------------------------------------------------------
  1 | from pydash import has
  2 | import difflib
  3 | 
  4 | 
  5 | def balanced_blockquotes(str):
  6 |     """
  7 |     Check if blockquotes are balanced
  8 | 
  9 |     Parameters
 10 |     ----------
 11 |     str : str
 12 |         text
 13 | 
 14 |     Returns
 15 |     -------
 16 |     True if blockquotes are balanced, False otherwise
 17 |     """
 18 |     num_blockquotes = str.count("```")
 19 |     # balanced if even number of blockquotes
 20 |     return (num_blockquotes % 2) == 0
 21 | 
 22 | 
 23 | def split_string(str, limit, sep="\n"):
 24 |     """
 25 |     Split string
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     str : str
 30 |         string to split
 31 | 
 32 |     limit : int
 33 |         string length limit
 34 | 
 35 |     sep : str
 36 |         separator
 37 |         default: "\n"
 38 | 
 39 |     Returns
 40 |     -------
 41 |     True if blockquotes are balanced, False otherwise
 42 |     """
 43 |     limit = int(limit)
 44 |     words = str.split(sep)
 45 | 
 46 |     if max(map(len, words)) > limit:
 47 |         # limit is too small, return original string
 48 |         return str
 49 | 
 50 |     res, part, others = [], words[0], words[1:]
 51 |     for word in others:
 52 |         if (len(sep) + len(word)) > (limit - len(part)):
 53 |             res.append(part)
 54 |             part = word
 55 |         else:
 56 |             part += sep + word
 57 |     if part:
 58 |         res.append(part)
 59 | 
 60 |     return res
 61 | 
 62 | 
 63 | def has_many(obj, base, keys):
 64 |     """
 65 |     Check if object has many keys
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     obj : object
 70 |         object to test
 71 | 
 72 |     base : str
 73 |         base key path
 74 | 
 75 |     keys : list
 76 |         keys to test
 77 | 
 78 |     Returns
 79 |     -------
 80 |     True if all keys exist, False otherwise
 81 |     """
 82 |     for key in keys:
 83 |         lookup = ""
 84 |         if base:
 85 |             lookup += base + "."
 86 |         lookup += key
 87 |         if not has(obj, lookup):
 88 |             return False
 89 |     return True
 90 | 
 91 | 
 92 | def num_to_emoji(n):
 93 |     """
 94 |     Convert number to discord emoji
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     n : str
 99 |         string number
100 | 
101 |     Returns
102 |     -------
103 |     str discord emoji if valid, False otherwise
104 |     """
105 |     num_emoji_map = {
106 |         "1": ":one:",
107 |         "2": ":two:",
108 |         "3": ":three:",
109 |         "4": ":four:",
110 |         "5": ":five:",
111 |         "6": ":six:",
112 |         "7": ":seven:",
113 |         "8": ":eight:",
114 |         "9": ":nine:",
115 |         "10": ":ten:",
116 |     }
117 | 
118 |     n = str(n)
119 |     if n in num_emoji_map:
120 |         return num_emoji_map[n]
121 |     return False
122 | 
123 | 
124 | def show_diff(actual, expected):
125 |     """
126 |     Show difference between expected and actual result
127 |     using discord bold format
128 | 
129 |     Parameters
130 |     ----------
131 |     actual : str
132 |         actual result
133 | 
134 |     expected : str
135 |         expected result
136 | 
137 |     Returns
138 |     -------
139 |     str with differences in bold
140 |     """
141 |     seqm = difflib.SequenceMatcher(None, actual, expected)
142 | 
143 |     output = []
144 |     for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
145 |         if opcode == "equal":
146 |             output.append(seqm.a[a0:a1])
147 |         elif opcode == "insert":
148 |             output.append("**" + seqm.b[b0:b1] + "**")
149 |         elif opcode == "delete":
150 |             output.append("~~" + seqm.a[a0:a1] + "~~")
151 |         elif opcode == "replace":
152 |             output.append("~~" + seqm.a[a0:a1] + "~~**" + seqm.b[b0:b1] + "**")
153 |         else:
154 |             # unexpected opcode
155 |             continue
156 |     return "Hint: " + "".join(output) + "\n"
157 | 
158 | 
159 | def is_float(value):
160 |     try:
161 |         float(value)
162 |         return True
163 |     except ValueError:
164 |         return False
165 | 


--------------------------------------------------------------------------------
/vdator/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from dotenv import load_dotenv
  4 | import json, os, requests, traceback
  5 | 
  6 | # APIs
  7 | import discord
  8 | from discord.utils import get
  9 | 
 10 | # parsers
 11 | from helpers import balanced_blockquotes, split_string
 12 | from parsers import *
 13 | from source_detector import SourceDetector
 14 | from reporter import Reporter, add_status_reactions
 15 | from checker import Checker
 16 | from checks.remove_until_first_codec import RemoveUntilFirstCodec
 17 | 
 18 | 
 19 | # script location
 20 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
 21 | 
 22 | # initialize parsers
 23 | with open(os.path.join(__location__, "data/urls.json")) as f:
 24 |     urls = json.load(f)["urls"]
 25 |     url_parser = URLParser(urls)
 26 | 
 27 | bdinfo_parser = BDInfoParser()
 28 | paste_parser = PasteParser(bdinfo_parser)
 29 | mediainfo_parser = MediaInfoParser()
 30 | 
 31 | with open(os.path.join(__location__, "data/codecs.json")) as f:
 32 |     codecs = json.load(f)
 33 |     codecs_parser = CodecsParser(codecs)
 34 | 
 35 | source_detector = SourceDetector()
 36 | reporter = Reporter()
 37 | checker = Checker(codecs_parser, source_detector, reporter)
 38 | 
 39 | # load environment variables
 40 | load_dotenv()
 41 | 
 42 | # environment variables
 43 | IN_GAME = os.environ.get("IN_GAME").strip()
 44 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip()
 45 | 
 46 | # channels to listen in and add reactions
 47 | REVIEW_CHANNELS = [x.strip() for x in os.environ.get("REVIEW_CHANNELS").split(",")]
 48 | 
 49 | # channels to send full summary to if from review channel
 50 | REVIEW_REPLY_CHANNELS = [
 51 |     x.strip() for x in os.environ.get("REVIEW_REPLY_CHANNELS").split(",")
 52 | ]
 53 | 
 54 | # channels to listen in and post full summaries
 55 | BOT_CHANNELS = [x.strip() for x in os.environ.get("BOT_CHANNELS").split(",")]
 56 | 
 57 | VERSION = "1.4.0"
 58 | 
 59 | 
 60 | def print_help():
 61 |     return (
 62 |         "vdator " + VERSION + " help: "
 63 |         "I take a Pastebin link with BDInfo and MediaInfo dump."
 64 |         " I ignore all input after the line `" + IGNORE_AFTER_LINE + "`."
 65 |         " I add reactions in the following review channels: `"
 66 |         + ", ".join(REVIEW_CHANNELS)
 67 |         + "`,"
 68 |         + " I reply with full summary from review channels to: `"
 69 |         + ", ".join(REVIEW_REPLY_CHANNELS)
 70 |         + "`"
 71 |         + " and post full summaries in: `"
 72 |         + ", ".join(BOT_CHANNELS)
 73 |         + "`."
 74 |         " Add a minus (-) sign in front of unused audio tracks in BDInfo."
 75 |         " I check:```"
 76 |         "Movie/TV name format\n"
 77 |         "IMDB/TMDB ids\n"
 78 |         "Filename\n"
 79 |         "Video language matches first audio language\n"
 80 |         "No muxing mode\n"
 81 |         "Uses latest mkvtoolnix\n"
 82 |         "Video and audio track names match\n"
 83 |         "DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA\n"
 84 |         "Commentary to AC-3 @ 224 kbps\n"
 85 |         "Commentary track people and spellcheck\n"
 86 |         "Subtitle order\n"
 87 |         "Subtitle default flag\n"
 88 |         "Should have chapters\n"
 89 |         "Chapter languages\n"
 90 |         "Chapter padding```"
 91 |         "**Commands:** !help, !version"
 92 |     )
 93 | 
 94 | 
 95 | def print_version():
 96 |     return "vdator " + VERSION
 97 | 
 98 | 
 99 | intents = discord.Intents.default()
100 | intents.message_content = True
101 | client = discord.Client(intents=intents)
102 | 
103 | 
104 | @client.event
105 | async def on_ready():
106 |     """
107 |     Discord client is ready
108 |     """
109 |     print("I'm in")
110 |     print(client.user)
111 |     await client.change_presence(activity=discord.Game(name=IN_GAME))
112 | 
113 | 
114 | @client.event
115 | async def on_message(message):
116 |     """
117 |     Discord message event
118 | 
119 |     Parameters
120 |     ----------
121 |     message : discord.Message class
122 |         discord message
123 |     """
124 |     # get name of channel message was sent in
125 |     # if message is in a thread, the channel name is in message.channel.parent, otherwise its in message.channel.name
126 |     channel_name = (
127 |         str(message.channel.parent)
128 |         if hasattr(message.channel, "parent")
129 |         else str(message.channel.name)
130 |     )
131 |     channel = get(
132 |         message.guild.channels,
133 |         name=channel_name,
134 |         type=discord.ChannelType.text,
135 |     )
136 | 
137 |     # only listens in bot and review channels
138 |     if not (channel_name in BOT_CHANNELS or channel_name in REVIEW_CHANNELS):
139 |         return
140 | 
141 |     # help command
142 |     if message.content == "!help":
143 |         reply = print_help()
144 |         await channel.send(reply)
145 |         return
146 | 
147 |     # version command
148 |     if message.content == "!version":
149 |         reply = print_version()
150 |         await channel.send(reply)
151 |         return
152 | 
153 |     # self
154 |     if message.author == client.user:
155 |         # add status reactions to own messages
156 |         await add_status_reactions(message, message.content)
157 |         return
158 | 
159 |     supported_urls = url_parser.extract_supported_urls(message.content)
160 | 
161 |     for url in supported_urls:
162 |         reply = "<" + url + ">" + "\n"
163 | 
164 |         try:
165 |             # setup/reset reporter
166 |             reporter.setup()
167 |             # get paste
168 |             r = requests.get(url)
169 |             r.raise_for_status()
170 |             paste = r.text
171 |         except:
172 |             traceback.print_exc()
173 |             reply += reporter.print_report("fail", "Failed to get paste")
174 |         else:
175 |             try:
176 |                 (bdinfo, mediainfo, eac3to) = paste_parser.parse(paste)
177 |             except:
178 |                 traceback.print_exc()
179 |                 reply += reporter.print_report("fail", "Paste parser failed")
180 |             else:
181 |                 if mediainfo:
182 |                     try:
183 |                         # parse mediainfo
184 |                         mediainfo = mediainfo_parser.parse(mediainfo)
185 |                     except:
186 |                         traceback.print_exc()
187 |                         reply += reporter.print_report(
188 |                             "fail", "Mediainfo parser failed"
189 |                         )
190 |                     else:
191 |                         try:
192 |                             remove_until_first_codec = RemoveUntilFirstCodec(
193 |                                 codecs_parser
194 |                             )
195 |                             match_bdinfo_audio_to_mediainfo = (
196 |                                 MatchBDInfoAudioToMediaInfo(
197 |                                     remove_until_first_codec, bdinfo, mediainfo
198 |                                 )
199 |                             )
200 |                             bdinfo[
201 |                                 "audio"
202 |                             ] = (
203 |                                 match_bdinfo_audio_to_mediainfo.match_bdinfo_audio_to_mediainfo()
204 |                             )
205 |                             bdinfo["audio"] = bdinfo_parser.expand_compat_tracks(
206 |                                 bdinfo["audio"]
207 |                             )
208 |                         except:
209 |                             traceback.print_exc()
210 |                             reply += reporter.print_report(
211 |                                 "fail", "Matching bdinfo audio tracks to mediainfo"
212 |                             )
213 |                         else:
214 |                             try:
215 |                                 # setup checker
216 |                                 checker.setup(bdinfo, mediainfo, eac3to, channel_name)
217 |                             except:
218 |                                 traceback.print_exc()
219 |                                 reply += reporter.print_report(
220 |                                     "fail", "vdator failed to setup checker"
221 |                                 )
222 |                             else:
223 |                                 try:
224 |                                     reply += checker.run_checks()
225 |                                 except:
226 |                                     traceback.print_exc()
227 |                                     reply += reporter.print_report(
228 |                                         "fail", "vdator failed to parse"
229 |                                     )
230 |                 else:
231 |                     reply += reporter.print_report(
232 |                         "error", "No mediainfo. Are you missing the `General` heading?"
233 |                     )
234 | 
235 |         # report
236 |         reply += "> **Report**\n"
237 |         reply += reporter.display_report()
238 | 
239 |         # split into multiple messages based on reply length
240 |         BLOCK_QUOTES = "```"
241 |         len_limit = (
242 |             int(os.environ.get("DISCORD_MSG_CHAR_LIMIT")) - len(BLOCK_QUOTES) * 2
243 |         )
244 |         replies = split_string(reply, len_limit, "\n")
245 | 
246 |         # preserve blockquotes
247 |         for i, r in enumerate(replies):
248 |             if i == len(replies) - 1:
249 |                 break
250 |             if not balanced_blockquotes(r):
251 |                 replies[i] += BLOCK_QUOTES
252 |                 replies[i + 1] = BLOCK_QUOTES + replies[i + 1]
253 | 
254 |         # fix blockquotes
255 |         for i, r in enumerate(replies):
256 |             replies[i] = replies[i].replace("``````", "```")
257 | 
258 |         if channel_name in BOT_CHANNELS:
259 |             # reply in bot channel
260 |             for reply in replies:
261 |                 await channel.send(reply)
262 |         elif channel_name in REVIEW_CHANNELS:
263 |             # add reactions in review channel
264 |             await add_status_reactions(message, reply)
265 | 
266 |             # and send reply to
267 |             for ch in REVIEW_REPLY_CHANNELS:
268 |                 review_reply_channel = get(
269 |                     message.guild.channels, name=ch, type=discord.ChannelType.text
270 |                 )
271 |                 for reply in replies:
272 |                     await review_reply_channel.send(reply)
273 | 
274 | 
275 | token = os.environ.get("DISCORD_BOT_SECRET")
276 | client.run(token)
277 | 


--------------------------------------------------------------------------------
/vdator/nltk_people.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | from nltk.corpus import stopwords
 3 | 
 4 | 
 5 | def download_nltk_data():
 6 |     # download nltk data
 7 |     ntlk_list = [
 8 |         "stopwords",
 9 |         "punkt",
10 |         "averaged_perceptron_tagger",
11 |         "maxent_ne_chunker",
12 |         "words",
13 |     ]
14 |     for t in ntlk_list:
15 |         nltk.download(t)
16 | 
17 | 
18 | def ie_preprocess(document):
19 |     """
20 |     nltk preprocess text
21 | 
22 |     Parameters
23 |     ----------
24 |     document : str
25 |         text to pre process
26 | 
27 |     Returns
28 |     -------
29 |     list sentences
30 |     """
31 |     stop = stopwords.words("english")
32 |     document = " ".join([i for i in document.split() if i not in stop])
33 |     sentences = nltk.sent_tokenize(document)
34 |     sentences = [nltk.word_tokenize(sent) for sent in sentences]
35 |     sentences = [nltk.pos_tag(sent) for sent in sentences]
36 |     return sentences
37 | 
38 | 
39 | def extract_names(document):
40 |     """
41 |     nltk extract person names
42 | 
43 |     Parameters
44 |     ----------
45 |     document : str
46 |         text
47 | 
48 |     Returns
49 |     -------
50 |     list person names
51 |     """
52 |     names = []
53 |     sentences = ie_preprocess(document)
54 |     for tagged_sentence in sentences:
55 |         for chunk in nltk.ne_chunk(tagged_sentence):
56 |             if type(chunk) == nltk.tree.Tree:
57 |                 if chunk.label() == "PERSON":
58 |                     names.append(" ".join([c[0] for c in chunk]))
59 |     return names
60 | 


--------------------------------------------------------------------------------
/vdator/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from .bdinfo_parser import BDInfoParser
2 | from .codecs_parser import CodecsParser
3 | from .match_bdinfo_audio_to_mediainfo import MatchBDInfoAudioToMediaInfo
4 | from .media_info_parser import MediaInfoParser
5 | from .paste_parser import PasteParser
6 | from .url_parser import URLParser
7 | 


--------------------------------------------------------------------------------
/vdator/parsers/bdinfo_parser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | 
  4 | class BDInfoParser(object):
  5 |     """
  6 |     Parse BDInfo
  7 |     """
  8 | 
  9 |     def __init__(self):
 10 |         self.embedded_track_types = ["ac3 core", "ac3 embedded"]
 11 |         # ['-ac3 core', '-ac3 embedded']
 12 |         self.embedded_track_types_excluded = [
 13 |             "-" + t for t in self.embedded_track_types
 14 |         ]
 15 |         # ['\\(ac3 core:', '\\(ac3 embedded:']
 16 |         self.embedded_track_types_regex = [
 17 |             r"\(" + a + ":" for a in self.embedded_track_types
 18 |         ]
 19 |         # ['\\(-ac3 core:', '\\(-ac3 embedded:']
 20 |         self.embedded_track_types_excluded_regex = [
 21 |             r"\(-" + a + ":.*\)" for a in self.embedded_track_types
 22 |         ]
 23 | 
 24 |     def format_track_name(self, name):
 25 |         """
 26 |         Format track name
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         name : str
 31 |             track name
 32 | 
 33 |         Returns
 34 |         -------
 35 |         str formatted track name
 36 |         """
 37 |         # remove multiple and trailing spaces
 38 |         name = " ".join(name.split()).strip()
 39 |         return name
 40 | 
 41 |     def format_video_track_name(self, name):
 42 |         """
 43 |         Format video track name
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         name : str
 48 |             track name
 49 | 
 50 |         Returns
 51 |         -------
 52 |         str formatted video track name
 53 |         """
 54 |         name = self.format_track_name(name)
 55 | 
 56 |         # remove 3d
 57 |         name = name.replace(" / Left Eye", "")
 58 |         name = name.replace(" / Right Eye", "")
 59 | 
 60 |         # force decimal instead of comma in fps
 61 |         name2 = name.split("/")
 62 |         if len(name2) >= 4:
 63 |             name2[3] = name2[3].replace(",", ".")
 64 |         name = "/".join(name2)
 65 | 
 66 |         return name
 67 | 
 68 |     def format_audio_track_name(self, name):
 69 |         """
 70 |         Format track name
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         name : str
 75 |             track name
 76 | 
 77 |         Returns
 78 |         -------
 79 |         str formatted audio track name
 80 |         """
 81 |         # remove (DTS Core:...)
 82 |         name = re.sub(r"\(DTS Core:.*\)", "", name).strip()
 83 | 
 84 |         # remove excluded (-AC3 Core...) and (-AC3 Embedded...)
 85 |         for ending in self.embedded_track_types_excluded_regex:
 86 |             name = re.sub(ending, "", name, flags=re.IGNORECASE).strip()
 87 | 
 88 |         # remove dialog normalization
 89 |         # needs to be after removing (DTS Core:...)
 90 |         # since the dts core track can have dialog normalization which will break its regex
 91 |         if "DN" in name.upper() and " / " in name:
 92 |             name = name.rpartition(" / ")[0]
 93 | 
 94 |         name = self.format_track_name(name)
 95 | 
 96 |         return name
 97 | 
 98 |     def has_compat_track(self, audio_track_name):
 99 |         audio_track_name = audio_track_name.lower()
100 |         for track_type in self.embedded_track_types:
101 |             if track_type in audio_track_name:
102 |                 return True
103 |         return False
104 | 
105 |     def format_audio_compatibility_track(self, audio_track):
106 |         """
107 |         Format audio compatibility track
108 | 
109 |         Parameters
110 |         ----------
111 |         audio_track : dict
112 |             audio track
113 |             dict{'name':'...', 'language':'...'}
114 | 
115 |         Returns
116 |         -------
117 |         audio track, compatibility track
118 |         [dict{'name':'...', 'language':'...'}, dict{'name':'...', 'language':'...'}]
119 |         """
120 |         audio_track_name_lower = audio_track["name"].lower()
121 |         for i, track_type in enumerate(self.embedded_track_types):
122 |             if track_type in audio_track_name_lower:
123 |                 embedded_track_type_index = i
124 |                 break
125 | 
126 |         audio_parts = re.split(
127 |             self.embedded_track_types_regex[embedded_track_type_index],
128 |             audio_track["name"],
129 |             flags=re.IGNORECASE,
130 |         )
131 |         audio_track["name"] = self.format_track_name(audio_parts[0])
132 | 
133 |         compat_track = {
134 |             "name": self.format_track_name(
135 |                 "Compatibility Track / Dolby Digital Audio / "
136 |                 + audio_parts[1].strip().rstrip(")")
137 |             ),
138 |             "language": audio_track["language"],
139 |         }
140 |         return audio_track, compat_track
141 | 
142 |     def format_audio_track(self, name):
143 |         """
144 |         Split audio track with name and language
145 | 
146 |         Parameters
147 |         ----------
148 |         name : str
149 |             track name
150 | 
151 |         Returns
152 |         -------
153 |         dict{'name':'...', 'language':'...'}
154 |         """
155 |         track = {"name": None, "language": None}
156 |         name = name.strip()
157 |         if " / " in name:
158 |             name_parts = name.split(" / ", 1)
159 |             track["name"] = self.format_audio_track_name(name_parts[1])
160 |             track["language"] = name_parts[0]
161 |         return track
162 | 
163 |     def format_subtitle_track(self, name):
164 |         """
165 |         Format subtitle track with language and bitrate
166 | 
167 |         Parameters
168 |         ----------
169 |         name : str
170 |             track name
171 | 
172 |         Returns
173 |         -------
174 |         dict{'language':'...', 'bitrate':'...'}
175 |         """
176 |         track = {"language": None, "bitrate": None}
177 |         name = name.strip()
178 |         if " / " in name:
179 |             name_parts = name.split(" / ", 1)
180 |             track["language"] = name_parts[0].strip()
181 |             track["bitrate"] = name_parts[1].strip()
182 |         return track
183 | 
184 |     def playlist_report_format_video_track_name(self, name):
185 |         """
186 |         Format playlist report video track name
187 | 
188 |         Parameters
189 |         ----------
190 |         name : str
191 |             track name
192 | 
193 |         Returns
194 |         -------
195 |         str formatted track name
196 |         """
197 |         try:
198 |             parts = name.split()
199 |             kbps_i = parts.index("kbps")
200 |             before = " ".join(parts[: kbps_i - 1]).strip()
201 |             after = " ".join(parts[kbps_i + 1 :]).strip()
202 |             track_name = (
203 |                 before + " / " + parts[kbps_i - 1] + " " + parts[kbps_i] + " / " + after
204 |             )
205 |             track_name = self.format_video_track_name(track_name)
206 |             return track_name
207 |         except ValueError:
208 |             return False
209 | 
210 |     def playlist_report_format_audio_track(self, name):
211 |         """
212 |         Format playlist report audio track
213 | 
214 |         Parameters
215 |         ----------
216 |         name : str
217 |             track name
218 | 
219 |         Returns
220 |         -------
221 |         dict{'name':'...', 'language':'...'}
222 |         """
223 |         track = {"name": None, "language": None, "compat_track": None}
224 |         try:
225 |             name = name.strip()
226 |             name_parts = name.split(" / ")
227 |             name_parts0 = name_parts[0].strip().split()
228 |             name = (
229 |                 " ".join(name_parts0[:-4])
230 |                 + " / "
231 |                 + name_parts0[-1]
232 |                 + " / "
233 |                 + " / ".join(name_parts[1:]).strip()
234 |             )
235 |             track["name"] = self.format_audio_track_name(name)
236 |             track["language"] = name_parts0[3]
237 |             return track
238 |         except ValueError:
239 |             return False
240 | 
241 |     def parse_quick_summary_line(self, bdinfo, l):
242 |         """
243 |         Parse quick summary line
244 | 
245 |         Parameters
246 |         ----------
247 |         bdinfo : dict
248 |             bdinfo dict
249 |         l : str
250 |             quick summary line
251 | 
252 |         Returns
253 |         -------
254 |         bdinfo dict
255 |         """
256 |         l2 = l.strip().lower()
257 |         # parse hidden tracks
258 |         l2 = l2.lstrip("* ")
259 |         if (
260 |             l2.startswith("video:")
261 |             or l2.startswith("audio:")
262 |             or l2.startswith("subtitle:")
263 |         ):
264 |             track_name = l.split(":", 1)[1].strip()
265 |         if l2.startswith("video:"):
266 |             track_name = self.format_video_track_name(track_name)
267 |             bdinfo["video"].append(track_name)
268 |         elif l2.startswith("audio:"):
269 |             audio_track = self.format_audio_track(track_name)
270 |             if self.has_compat_track(audio_track["name"]):
271 |                 (
272 |                     audio_track,
273 |                     compat_track,
274 |                 ) = self.format_audio_compatibility_track(audio_track)
275 |                 audio_track["compat_track"] = compat_track
276 |             bdinfo["audio"].append(audio_track)
277 |         elif l2.startswith("subtitle:"):
278 |             bdinfo["subtitle"].append(self.format_subtitle_track(track_name))
279 |         else:
280 |             # get all other bdinfo entries
281 |             l = l.split(":", 1)
282 |             if len(l) >= 2:
283 |                 bdinfo[l[0].strip().lower()] = l[1].strip()
284 |         return bdinfo
285 | 
286 |     def expand_compat_tracks(self, bdinfo_audio):
287 |         """
288 |         Expand audio compatibility tracks into two tracks and keep order
289 | 
290 |         Returns
291 |         -------
292 |         audio_tracks list
293 |         """
294 |         audio_tracks = list()
295 |         for audio_track in bdinfo_audio:
296 |             audio_tracks.append(audio_track)
297 |             if "compat_track" in audio_track:
298 |                 audio_tracks.append(audio_track["compat_track"])
299 | 
300 |         return audio_tracks
301 | 


--------------------------------------------------------------------------------
/vdator/parsers/codecs_parser.py:
--------------------------------------------------------------------------------
  1 | class CodecsParser(object):
  2 |     """
  3 |     Define codecs
  4 |     """
  5 | 
  6 |     def __init__(self, codecs):
  7 |         """
  8 |         Define codecs
  9 | 
 10 |         Parameters
 11 |         ----------
 12 |         codecs : dict
 13 |           codec definitions
 14 |         """
 15 | 
 16 |         """
 17 |         {
 18 |           "codecs": {
 19 |             "video": {...},
 20 |             "audio": {...},
 21 |             "subtitles": {...},
 22 |             "chapters": {...}
 23 |           },
 24 |           "track_titles": {
 25 |             "video": {...},
 26 |             "audio": {...}
 27 |           },
 28 |           "scan_types": {...}
 29 |         }
 30 |         """
 31 |         self.codecs = codecs
 32 | 
 33 |         # map of all codec names to extensions
 34 |         self.codec_ext = {
 35 |             **self.codecs["codecs"]["video"],
 36 |             **self.codecs["codecs"]["audio"],
 37 |             **self.codecs["codecs"]["subtitles"],
 38 |             **self.codecs["codecs"]["chapters"],
 39 |         }
 40 | 
 41 |     def is_video(self, codec):
 42 |         """
 43 |         Is this a video codec?
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         codec : str
 48 |           codec
 49 | 
 50 |         Returns
 51 |         -------
 52 |         True if codec is a video codec, False otherwise.
 53 |         """
 54 |         if codec in self.codecs["codecs"]["video"]:
 55 |             return True
 56 |         return False
 57 | 
 58 |     def is_video_title(self, codec):
 59 |         """
 60 |         Is this a video title codec?
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         codec : str
 65 |           codec
 66 | 
 67 |         Returns
 68 |         -------
 69 |         True if codec is a video title codec, False otherwise.
 70 |         """
 71 |         if codec in self.codecs["track_titles"]["video"]:
 72 |             return True
 73 |         return False
 74 | 
 75 |     def is_video_3d(self, codec):
 76 |         """
 77 |         Is this a 3d video codec?
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         codec : str
 82 |           codec
 83 | 
 84 |         Returns
 85 |         -------
 86 |         True if codec is a 3d video, False otherwise.
 87 |         """
 88 |         if codec in self.codecs["codecs"]["video_3d"]:
 89 |             return True
 90 |         return False
 91 | 
 92 |     def is_audio(self, codec):
 93 |         """
 94 |         Is this an audio codec?
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         codec : str
 99 |           codec
100 | 
101 |         Returns
102 |         -------
103 |         True if codec is an audio codec, False otherwise.
104 |         """
105 |         if codec in self.codecs["codecs"]["audio"]:
106 |             return True
107 |         return False
108 | 
109 |     def is_audio_title(self, codec):
110 |         """
111 |         Is this an audio title codec?
112 | 
113 |         Parameters
114 |         ----------
115 |         codec : str
116 |           codec
117 | 
118 |         Returns
119 |         -------
120 |         True if codec is an audio title codec, False otherwise.
121 |         """
122 |         if codec in self.codecs["track_titles"]["audio"]:
123 |             return True
124 |         return False
125 | 
126 |     def is_sub(self, codec):
127 |         """
128 |         Is this a subtitle codec?
129 | 
130 |         Parameters
131 |         ----------
132 |         codec : str
133 |           codec
134 | 
135 |         Returns
136 |         -------
137 |         True if codec is a subtitle codec, False otherwise.
138 |         """
139 |         if codec in self.codecs["codecs"]["subtitles"]:
140 |             return True
141 |         return False
142 | 
143 |     def is_chapter(self, codec):
144 |         """
145 |         Is this a chapter codec?
146 | 
147 |         Parameters
148 |         ----------
149 |         codec : str
150 |           codec
151 | 
152 |         Returns
153 |         -------
154 |         True if codec is a chapter codec, False otherwise.
155 |         """
156 |         if codec in self.codecs["codecs"]["chapters"]:
157 |             return True
158 |         return False
159 | 
160 |     def is_codec(self, codec):
161 |         """
162 |         Is this a valid codec?
163 | 
164 |         Parameters
165 |         ----------
166 |         codec : str
167 |           codec
168 | 
169 |         Returns
170 |         -------
171 |         True if valid codec, False otherwise.
172 |         """
173 |         return codec in self.codec_ext
174 | 
175 |     def get_codec_ext(self, codec):
176 |         """
177 |         Get codec extension. Checks if codec is valid.
178 | 
179 |         Parameters
180 |         ----------
181 |         codec : str
182 |           codec
183 | 
184 |         Returns
185 |         -------
186 |         str codec extension
187 |         """
188 |         if codec not in self.codec_ext:
189 |             return ""
190 |         return self.codec_ext[codec]
191 | 
192 |     def get_video_codec_title_name(self, codec):
193 |         """
194 |         Get name of video codec for title. Checks if video codec is valid.
195 | 
196 |         Parameters
197 |         ----------
198 |         codec : str
199 |           codec
200 | 
201 |         Returns
202 |         -------
203 |         str codec title name
204 |         """
205 |         if codec not in self.codecs["track_titles"]["video"]:
206 |             return ""
207 |         return self.codecs["track_titles"]["video"][codec]
208 | 
209 |     def get_audio_codec_title_name(self, codec):
210 |         """
211 |         Get name of audio codec for title. Checks if audio codec is valid.
212 | 
213 |         Parameters
214 |         ----------
215 |         codec : str
216 |           codec
217 | 
218 |         Returns
219 |         -------
220 |         str codec title name
221 |         """
222 |         if codec not in self.codecs["track_titles"]["audio"]:
223 |             return ""
224 |         return self.codecs["track_titles"]["audio"][codec]
225 | 
226 |     def get_scan_type_title_name(self, scan_type, video_fps):
227 |         """
228 |         Get name of video scan type for title. Checks if scan type is valid.
229 | 
230 |         Parameters
231 |         ----------
232 |         scan_type : str
233 |           scan type
234 | 
235 |         video_fps : str
236 |           frame rate
237 | 
238 |         Returns
239 |         -------
240 |         str scan type title name, boolean if actually progressive
241 |         """
242 |         actually_progressive = False
243 |         scan_type = scan_type.strip().lower()
244 | 
245 |         if len(scan_type) >= 1:
246 |             scan_type = "progressive" if scan_type[0] == "p" else "interlaced"
247 | 
248 |         # interlaced @ 25fps is actually progressive
249 |         # but it's still called interlaced
250 |         if scan_type == "interlaced" and int(video_fps) == 25:
251 |             actually_progressive = True
252 | 
253 |         if scan_type not in self.codecs["scan_types"]:
254 |             return "", actually_progressive
255 |         return self.codecs["scan_types"][scan_type], actually_progressive
256 | 


--------------------------------------------------------------------------------
/vdator/parsers/match_bdinfo_audio_to_mediainfo.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | 
 4 | class MatchBDInfoAudioToMediaInfo(object):
 5 |     def __init__(self, remove_until_first_codec, bdinfo, mediainfo):
 6 |         self.remove_until_first_codec = remove_until_first_codec
 7 |         self.bdinfo = bdinfo
 8 |         self.mediainfo = mediainfo
 9 | 
10 |     def match_bdinfo_audio_to_mediainfo(self):
11 |         # tries to match bdinfo audio tracks to mediainfo by codec and channels
12 |         # for every mediainfo track, pick first matching bdinfo track
13 |         # returns a sorted list of bdinfo audio tracks
14 |         sorted_bdinfo_audio_tracks = list()
15 | 
16 |         bdinfo_audio_tracks = copy.deepcopy(self.bdinfo["audio"])
17 |         mediainfo_audio_tracks = copy.deepcopy(self.mediainfo["audio"])
18 | 
19 |         for mediainfo_audio_track in mediainfo_audio_tracks:
20 |             # go through every mediainfo audio track
21 |             mediainfo_audio_title, mediainfo_audio_track_parts = None, []
22 | 
23 |             if "title" in mediainfo_audio_track:
24 |                 (
25 |                     mediainfo_audio_title,
26 |                     _,
27 |                     _,
28 |                 ) = self.remove_until_first_codec.remove(mediainfo_audio_track["title"])
29 |                 if mediainfo_audio_title:
30 |                     mediainfo_audio_track_parts = mediainfo_audio_title.split(" / ")
31 | 
32 |             # find the next matching bdinfo audio track
33 |             for i, bdinfo_audio_track in enumerate(bdinfo_audio_tracks):
34 |                 bdinfo_audio_title = None
35 |                 if "name" in bdinfo_audio_track:
36 |                     (
37 |                         bdinfo_audio_title,
38 |                         _,
39 |                         _,
40 |                     ) = self.remove_until_first_codec.remove(bdinfo_audio_track["name"])
41 | 
42 |                 if len(mediainfo_audio_track_parts) > 1 and bdinfo_audio_title:
43 |                     bdinfo_audio_track_parts = bdinfo_audio_title.split(" / ")
44 |                     if len(bdinfo_audio_track_parts) > 1:
45 |                         if (
46 |                             bdinfo_audio_track_parts[0]
47 |                             == mediainfo_audio_track_parts[0]
48 |                             and bdinfo_audio_track_parts[1]
49 |                             == mediainfo_audio_track_parts[1]
50 |                         ):
51 |                             # codecs and channel match
52 |                             sorted_bdinfo_audio_tracks.append(bdinfo_audio_track)
53 |                             del bdinfo_audio_tracks[i]
54 |                             break
55 | 
56 |             if len(bdinfo_audio_tracks) == 0:
57 |                 break
58 | 
59 |         if len(bdinfo_audio_tracks) > 0:
60 |             # add leftover bdinfo audio tracks
61 |             sorted_bdinfo_audio_tracks.extend(bdinfo_audio_tracks)
62 | 
63 |         return sorted_bdinfo_audio_tracks
64 | 


--------------------------------------------------------------------------------
/vdator/parsers/media_info_parser.py:
--------------------------------------------------------------------------------
  1 | class MediaInfoParser(object):
  2 |     """
  3 |     Parse MediaInfo
  4 |     """
  5 | 
  6 |     def parse(self, text):
  7 |         """
  8 |         Parse mediainfo
  9 | 
 10 |         Parameters
 11 |         ----------
 12 |         text : list
 13 |             list of mediainfo lines
 14 | 
 15 |         Returns
 16 |         -------
 17 |         dict mediainfo with 'general', 'video', 'audio', 'text', and 'menu' keys
 18 |         """
 19 |         mediainfo_sections = ["general", "video", "audio", "text", "menu"]
 20 |         # dictionary of lists for mediainfo data
 21 |         mediainfo = dict((k, list()) for k in mediainfo_sections)
 22 |         # starts at 0 on first loop
 23 |         section_index = dict((k, -1) for k in mediainfo_sections)
 24 |         # current mediainfo section
 25 |         curr_sect = None
 26 | 
 27 |         # skip blank lines
 28 |         text_list = list(filter(None, text))
 29 | 
 30 |         for l in text_list:
 31 |             # new section of mediainfo
 32 |             section_word = l.strip().split()[0].strip().lower()
 33 |             if section_word in mediainfo_sections:
 34 |                 # track current section
 35 |                 curr_sect = section_word
 36 |                 # increment index
 37 |                 section_index[section_word] += 1
 38 |                 # store new list for chapters, and new dictionary for other sections
 39 |                 mediainfo[section_word].append(
 40 |                     list() if section_word == "menu" else dict()
 41 |                 )
 42 |                 continue
 43 | 
 44 |             # split mediainfo data line
 45 |             curr = l.split(" : ", 1)
 46 | 
 47 |             if curr_sect in ["general", "video", "audio", "text"] and len(curr) >= 2:
 48 |                 # assign section to dictionary
 49 |                 mediainfo[curr_sect][section_index[curr_sect]][
 50 |                     self.format_key(curr[0])
 51 |                 ] = curr[1]
 52 |             elif curr_sect == "menu":
 53 |                 mediainfo["menu"][section_index[curr_sect]].append(
 54 |                     self.parse_chapter(curr)
 55 |                 )
 56 | 
 57 |         return mediainfo
 58 | 
 59 |     def format_key(self, key):
 60 |         """
 61 |         Format keys into abc_def_ghi
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         key : str
 66 |             mediainfo key
 67 | 
 68 |         Returns
 69 |         -------
 70 |         str formatted mediainfo key
 71 |         """
 72 |         return (
 73 |             key.strip()
 74 |             .replace(" ", "_")
 75 |             .replace("/", "_")
 76 |             .replace("(", "")
 77 |             .replace(")", "")
 78 |             .replace("*", "_")
 79 |             .replace(",", "")
 80 |             .lower()
 81 |         )
 82 | 
 83 |     def parse_chapter(self, curr):
 84 |         """
 85 |         Parse a single chapter
 86 | 
 87 |         Parameters
 88 |         ----------
 89 |         curr : list
 90 |             current line
 91 | 
 92 |         Returns
 93 |         -------
 94 |         dict chapter
 95 |         {"time": "...", "titles": [...], "languages": [...]}
 96 |         languages list has unique elements
 97 |         """
 98 |         chapter = {"time": None, "titles": list(), "languages": set()}
 99 |         if len(curr) >= 1:
100 |             chapter["time"] = curr[0].strip()
101 |         if len(curr) >= 2:
102 |             chapter_title = {
103 |                 "language": None,
104 |                 "title": None,
105 |             }
106 |             if " - " in curr[1]:
107 |                 langs = curr[1].split(" - ")
108 |                 for lang in langs:
109 |                     if ":" in lang:
110 |                         # chapter has a language
111 |                         ch = self.format_chapter(lang)
112 |                         chapter["titles"].append(ch)
113 |                         chapter["languages"].add(ch["language"])
114 |             elif ":" in curr[1]:
115 |                 # chapter has a language
116 |                 ch = self.format_chapter(curr[1])
117 |                 chapter["titles"].append(ch)
118 |                 chapter["languages"].add(ch["language"])
119 |             else:
120 |                 # no language, just store title
121 |                 chapter_title["title"] = curr[1]
122 |                 chapter["titles"].append(chapter_title)
123 |         chapter["languages"] = list(chapter["languages"])
124 |         return chapter
125 | 
126 |     def format_chapter(self, text):
127 |         """
128 |         Format chapter language and title
129 | 
130 |         Parameters
131 |         ----------
132 |         text : str
133 |             chapter text
134 | 
135 |         Returns
136 |         -------
137 |         dict chapter with 'language', 'title' keys
138 |         """
139 |         l = text.split(":", 1)
140 |         chapter = {"language": l[0].strip(), "title": l[1]}
141 |         return chapter
142 | 


--------------------------------------------------------------------------------
/vdator/parsers/paste_parser.py:
--------------------------------------------------------------------------------
  1 | from dotenv import load_dotenv
  2 | from enum import Enum
  3 | import os
  4 | 
  5 | # load environment variables
  6 | load_dotenv()
  7 | 
  8 | # environment variables
  9 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip()
 10 | IGNORE_AFTER_LINE_METHOD = os.environ.get("IGNORE_AFTER_LINE_METHOD").strip()
 11 | IGNORE_UNTIL_BLANK_LINE_PREFIXES = [
 12 |     x.strip()
 13 |     for x in os.getenv("IGNORE_UNTIL_BLANK_LINE_PREFIXES", "").strip().split(",")
 14 | ]
 15 | 
 16 | 
 17 | class BDInfoType(Enum):
 18 |     QUICK_SUMMARY = 1
 19 |     PLAYLIST_REPORT = 2
 20 | 
 21 | 
 22 | class PasteParser(object):
 23 |     def __init__(self, bdinfo_parser):
 24 |         self.bdinfo_parser = bdinfo_parser
 25 | 
 26 |     class Section(Enum):
 27 |         QUICK_SUMMARY = 1
 28 |         MEDIAINFO = 2
 29 |         PLAYLIST_REPORT = 3
 30 |         EAC3TO_LOG = 4
 31 | 
 32 |     class Section2(Enum):
 33 |         PLAYLIST_VIDEO = 1
 34 |         PLAYLIST_AUDIO = 2
 35 |         PLAYLIST_SUBTITLES = 3
 36 | 
 37 |     class Section3(Enum):
 38 |         PLAYLIST_INNER_VIDEO = 1
 39 |         PLAYLIST_INNER_AUDIO = 2
 40 | 
 41 |     def parse(self, text):
 42 |         """
 43 |         Parse text to extract bdinfo, mediainfo and eac3to log
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         text : str
 48 |             text to parse
 49 | 
 50 |         Returns
 51 |         -------
 52 |         bdinfo, mediainfo, and eac3to lists
 53 |         """
 54 |         bdinfo = {"video": list(), "audio": list(), "subtitle": list()}
 55 |         mediainfo = list()
 56 |         eac3to = list()
 57 |         eac3to_index = -1
 58 | 
 59 |         sect = None
 60 |         sect2 = None
 61 |         sect3 = None
 62 | 
 63 |         # parse bdinfo
 64 |         lines = text.splitlines()
 65 |         ignore_next_lines, did_first_mediainfo = False, False
 66 |         for l in lines:
 67 |             # break after ignore line
 68 |             if self._isIgnoreAfterLine(l):
 69 |                 break
 70 | 
 71 |             if not l.strip():
 72 |                 # don't ignore input after blank line
 73 |                 ignore_next_lines = False
 74 |                 # skip blank lines
 75 |                 continue
 76 | 
 77 |             if ignore_next_lines:
 78 |                 continue
 79 | 
 80 |             if (
 81 |                 IGNORE_UNTIL_BLANK_LINE_PREFIXES
 82 |                 and IGNORE_UNTIL_BLANK_LINE_PREFIXES[0] != ""
 83 |             ):
 84 |                 l3 = l.strip().lower()
 85 |                 for x in IGNORE_UNTIL_BLANK_LINE_PREFIXES:
 86 |                     if l3.startswith(x):
 87 |                         ignore_next_lines = True
 88 |                         break
 89 | 
 90 |             l = l.strip()
 91 |             l2 = l.strip().lower()
 92 | 
 93 |             # determine current section
 94 |             # limit to first mediainfo
 95 |             if (
 96 |                 l2.startswith("quick summary")
 97 |                 or l2.startswith("disc title")
 98 |                 or l2.startswith("disc label")
 99 |             ):
100 |                 sect = self.Section.QUICK_SUMMARY
101 |                 bdinfo["type"] = BDInfoType.QUICK_SUMMARY
102 |             elif l2.startswith("playlist report"):
103 |                 sect = self.Section.PLAYLIST_REPORT
104 |                 bdinfo["type"] = BDInfoType.PLAYLIST_REPORT
105 |             elif l2.startswith("eac3to v"):
106 |                 sect = self.Section.EAC3TO_LOG
107 |                 eac3to.append(list())
108 |                 eac3to_index += 1
109 |             elif l2.startswith("general"):
110 |                 if did_first_mediainfo:
111 |                     sect = None
112 |                 else:
113 |                     sect = self.Section.MEDIAINFO
114 |                     did_first_mediainfo = True
115 | 
116 |             if sect == self.Section.QUICK_SUMMARY:
117 |                 # parse quick summary into bdinfo dict
118 |                 self.bdinfo_parser.parse_quick_summary_line(bdinfo, l)
119 |             elif sect == self.Section.PLAYLIST_REPORT:
120 | 
121 |                 if l2.startswith("video:"):
122 |                     sect2 = self.Section2.PLAYLIST_VIDEO
123 |                 elif l2.startswith("audio:"):
124 |                     sect2 = self.Section2.PLAYLIST_AUDIO
125 |                 elif l2.startswith("subtitles:"):
126 |                     sect2 = self.Section2.PLAYLIST_SUBTITLES
127 | 
128 |                 if l2.startswith("-----"):
129 |                     if sect2 == self.Section2.PLAYLIST_VIDEO:
130 |                         sect3 = self.Section3.PLAYLIST_INNER_VIDEO
131 |                     elif sect2 == self.Section2.PLAYLIST_AUDIO:
132 |                         sect3 = self.Section3.PLAYLIST_INNER_AUDIO
133 |                 else:
134 |                     # skip tracks that start with minus sign
135 |                     if l.startswith("-"):
136 |                         continue
137 |                     # parse hidden tracks
138 |                     l = l.lstrip("* ")
139 | 
140 |                     if (
141 |                         sect2 == self.Section2.PLAYLIST_VIDEO
142 |                         and sect3 == self.Section3.PLAYLIST_INNER_VIDEO
143 |                     ):
144 |                         # format video track name with slashes
145 |                         track_name = (
146 |                             self.bdinfo_parser.playlist_report_format_video_track_name(
147 |                                 l
148 |                             )
149 |                         )
150 |                         if track_name:
151 |                             bdinfo["video"].append(track_name)
152 | 
153 |                     elif (
154 |                         sect2 == self.Section2.PLAYLIST_AUDIO
155 |                         and sect3 == self.Section3.PLAYLIST_INNER_AUDIO
156 |                     ):
157 |                         audio_track = (
158 |                             self.bdinfo_parser.playlist_report_format_audio_track(l)
159 |                         )
160 |                         if self.bdinfo_parser.has_compat_track(l):
161 |                             (
162 |                                 audio_track,
163 |                                 compat_track,
164 |                             ) = self.bdinfo_parser.format_audio_compatibility_track(
165 |                                 audio_track
166 |                             )
167 |                             audio_track["compat_track"] = compat_track
168 |                         bdinfo["audio"].append(audio_track)
169 | 
170 |             elif sect == self.Section.MEDIAINFO:
171 |                 mediainfo.append(l)
172 | 
173 |             elif sect == self.Section.EAC3TO_LOG:
174 |                 if l.startswith("Done."):
175 |                     sect = None
176 |                 else:
177 |                     eac3to[eac3to_index].append(l)
178 | 
179 |         return bdinfo, mediainfo, eac3to
180 | 
181 |     def _isIgnoreAfterLine(self, l):
182 |         """
183 |         Check if we should ignore all input after the current line
184 | 
185 |         Parameters
186 |         ----------
187 |         l : str
188 |             current line
189 | 
190 |         Returns
191 |         -------
192 |         True if should ignore further input, False otherwise
193 |         """
194 |         if IGNORE_AFTER_LINE_METHOD == "equals":
195 |             if IGNORE_AFTER_LINE == l:
196 |                 return True
197 |         elif IGNORE_AFTER_LINE_METHOD == "contains":
198 |             if IGNORE_AFTER_LINE in l:
199 |                 return True
200 |         return False
201 | 


--------------------------------------------------------------------------------
/vdator/parsers/url_parser.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urlparse
 2 | import re
 3 | 
 4 | 
 5 | class URLParser(object):
 6 |     def __init__(self, urls):
 7 |         # regex used to extract urls from message
 8 |         self.urls_regex = r"(?P<url>https?://[^\s]+)"
 9 | 
10 |         """
11 |         # hostname
12 |         'example.com': {
13 |             # regex to get paste's unique identifier
14 |             'slug_regex': 'https://example.com/(.*)',
15 |             
16 |             # link to raw text using {} in place of the unique identifier
17 |             'raw_url': 'https://example.com/raw/{}'
18 |         }
19 |         """
20 |         self.urls = urls
21 | 
22 |     def extract_supported_urls(self, text):
23 |         # list of urls
24 |         urls = re.findall(self.urls_regex, text)
25 |         raw_urls = list()
26 |         for url in urls:
27 |             o = urlparse(url)
28 |             # check if url is supported
29 |             if o.hostname in self.urls:
30 |                 raw_url = self.get_raw_url(url, o.hostname, o.path)
31 |                 raw_urls.append(raw_url)
32 |         return raw_urls
33 | 
34 |     def get_raw_url(self, url, hostname, path):
35 |         # get url to raw content
36 |         raw_url = url
37 | 
38 |         # check if its not already a raw url
39 |         is_already_raw_url = re.search(
40 |             self.urls[hostname]["raw_url_regex"].format("(.*)"), url
41 |         )
42 | 
43 |         if not is_already_raw_url:
44 |             slug = re.search(self.urls[hostname]["slug_regex"], url)
45 |             if slug:
46 |                 raw_url = self.urls[hostname]["raw_url"].format(slug.group(1))
47 | 
48 |         return raw_url
49 | 
50 |     def get_urls(self):
51 |         return self.urls
52 | 


--------------------------------------------------------------------------------
/vdator/reporter.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | # APIs
  4 | import emoji
  5 | from helpers import num_to_emoji
  6 | 
  7 | 
  8 | class Reporter(object):
  9 |     """
 10 |     Keep track of types of responses
 11 |     """
 12 | 
 13 |     def __init__(self):
 14 |         self.setup()
 15 | 
 16 |     def setup(self):
 17 |         """
 18 |         Setup/Reset the reporter
 19 |         """
 20 |         self.report = {"correct": 0, "warning": 0, "error": 0, "info": 0, "fail": 0}
 21 | 
 22 |     def print_report(self, type, message, record=True, new_line=True):
 23 |         """
 24 |         Display report
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         type : ReportType
 29 |             type of report: 'correct', 'warning', 'error', or 'info'
 30 | 
 31 |         message : str
 32 |             reply message
 33 | 
 34 |         record : bool
 35 |             should this report be kept track of in total
 36 | 
 37 |         new_line : bool
 38 |             print a new line after message
 39 |             default: True
 40 |         """
 41 |         if record:
 42 |             self.report[type.lower()] += 1
 43 | 
 44 |         msg_type = {
 45 |             "correct": emoji.emojize(":ballot_box_with_check:", language="alias"),
 46 |             "warning": emoji.emojize(":warning:", language="alias"),
 47 |             "error": emoji.emojize(":x:", language="alias"),
 48 |             "info": emoji.emojize(":information_source:", language="alias"),
 49 |             "fail": emoji.emojize(":interrobang:", language="alias"),
 50 |         }
 51 | 
 52 |         if type.lower() in msg_type:
 53 |             type = msg_type[type.lower()] + " "
 54 |         else:
 55 |             type = "[" + type.upper() + "] "
 56 | 
 57 |         return type + message + ("\n" if new_line else "")
 58 | 
 59 |     def get_report(self):
 60 |         """
 61 |         Get the report results
 62 | 
 63 |         Returns
 64 |         -------
 65 |         report dict: {'correct' : int, 'warning' : int, 'error' : int, 'info' : int, 'fail' : int}
 66 |         """
 67 |         return self.report
 68 | 
 69 |     def display_report(self):
 70 |         """
 71 |         Get the report reply
 72 | 
 73 |         Returns
 74 |         -------
 75 |         str reply
 76 |         """
 77 |         reply = str(self.report["correct"]) + " correct, "
 78 | 
 79 |         reply += str(self.report["warning"]) + " warning"
 80 |         reply += "" if self.report["warning"] == 1 else "s"
 81 | 
 82 |         reply += ", " + str(self.report["error"]) + " error"
 83 |         reply += "" if self.report["error"] == 1 else "s"
 84 | 
 85 |         reply += ", " + str(self.report["fail"]) + " failure"
 86 |         reply += "" if self.report["fail"] == 1 else "s"
 87 | 
 88 |         reply += ", and " + str(self.report["info"]) + " info"
 89 |         return reply
 90 | 
 91 | 
 92 | async def react_num_errors(message, num_errors):
 93 |     """
 94 |     Add status reactions to discord message with number of errors
 95 |     Adds a plus sign if more than 10 errors
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     message : discord.Message
100 |         discord message to react to
101 | 
102 |     num_errors : int
103 |         number of errors
104 |     """
105 |     if num_errors in range(1, 11):
106 |         # errors between 1 and 10
107 |         em = num_to_emoji(num_errors)
108 |         if em:
109 |             await message.add_reaction(emoji.emojize(em, language="alias"))
110 |     elif num_errors > 10:
111 |         # more than 10 errors
112 |         await message.add_reaction(emoji.emojize(num_to_emoji(10), language="alias"))
113 |         await message.add_reaction(emoji.emojize(":heavy_plus_sign:", language="alias"))
114 | 
115 | 
116 | async def add_status_reactions(message, content):
117 |     """
118 |     Add status reactions to discord message
119 | 
120 |     Parameters
121 |     ----------
122 |     message : discord.Message
123 |         discord message to react to
124 | 
125 |     content : str
126 |         content to parse to determine reactions
127 |     """
128 |     # add status reactions to message based on content
129 |     report_re = re.search(
130 |         r"(\d+)\scorrect,\s(\d+)\swarnings?,\s(\d+)\serrors?,\s(\d+)\sfailures?,\sand\s(\d+)\sinfo",
131 |         content,
132 |     )
133 |     if report_re:
134 |         report = {
135 |             "correct": int(report_re.group(1)),
136 |             "warning": int(report_re.group(2)),
137 |             "error": int(report_re.group(3)),
138 |             "fail": int(report_re.group(4)),
139 |             "info": int(report_re.group(5)),
140 |         }
141 | 
142 |         if report["warning"] == 0 and report["error"] == 0 and report["fail"] == 0:
143 |             await message.add_reaction(
144 |                 emoji.emojize(":ballot_box_with_check:", language="alias")
145 |             )
146 |         else:
147 |             if report["warning"] > 0:
148 |                 await message.add_reaction(emoji.emojize(":warning:", language="alias"))
149 |             if report["error"] > 0:
150 |                 await message.add_reaction(emoji.emojize(":x:", language="alias"))
151 | 
152 |             num_errors = report["warning"] + report["error"]
153 |             if num_errors > 0:
154 |                 await react_num_errors(message, num_errors)
155 | 
156 |             if report["fail"] > 0:
157 |                 await message.add_reaction(
158 |                     emoji.emojize(":interrobang:", language="alias")
159 |                 )
160 |                 await react_num_errors(message, report["fail"])
161 | 


--------------------------------------------------------------------------------
/vdator/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.8.3
 2 | aiosignal==1.3.1
 3 | async-timeout==4.0.2
 4 | attrs==22.2.0
 5 | black==22.12.0
 6 | certifi==2022.12.7
 7 | charset-normalizer==2.1.1
 8 | cinemagoer==2022.12.27
 9 | click==8.1.3
10 | discord-markdown==0.4.0
11 | discord.py==2.1.0
12 | emoji==2.2.0
13 | Flask==2.2.2
14 | frozenlist==1.3.3
15 | greenlet==2.0.2
16 | hunspell==0.5.5
17 | idna==3.4
18 | iso-639==0.4.5
19 | itsdangerous==2.1.2
20 | Jinja2==3.1.2
21 | joblib==1.2.0
22 | langdetect==1.0.9
23 | lxml==4.9.2
24 | MarkupSafe==2.1.2
25 | multidict==6.0.3
26 | mypy-extensions==0.4.3
27 | nltk==3.8.1
28 | numpy==1.23.5
29 | pathspec==0.10.3
30 | platformdirs==2.6.0
31 | pydash==5.1.2
32 | python-dotenv==0.21.0
33 | regex==2022.10.31
34 | requests==2.31.0
35 | six==1.16.0
36 | SQLAlchemy==1.4.46
37 | tmdbsimple==2.9.1
38 | tomli==2.0.1
39 | tqdm==4.64.1
40 | Unidecode==1.3.6
41 | urllib3==1.26.13
42 | Werkzeug==2.2.3
43 | yarl==1.8.2
44 | 


--------------------------------------------------------------------------------
/vdator/source_detector.py:
--------------------------------------------------------------------------------
  1 | import os, re
  2 | 
  3 | # 'mediainfo' to use mediainfo fields
  4 | # 'nobdinfo' to assume DVD if no bdinfo given
  5 | DVD_CHECK_MODE = os.environ.get("DVD_CHECK_MODE").strip()
  6 | 
  7 | # detect if DVD, 1080p BluRay or UHD BluRay
  8 | class SourceDetector(object):
  9 |     """
 10 |     Define ways to detect source
 11 |     """
 12 | 
 13 |     def setup(self, bdinfo, mediainfo):
 14 |         """
 15 |         Parameters
 16 |         ----------
 17 |         bdinfo : dict
 18 |             bdinfo
 19 | 
 20 |         mediainfo : dict
 21 |             mediainfo
 22 |         """
 23 |         self.bdinfo = bdinfo
 24 |         self.mediainfo = mediainfo
 25 | 
 26 |     def is_dvd(self):
 27 |         """
 28 |         Is this source a DVD?
 29 | 
 30 |         Returns
 31 |         -------
 32 |         boolean True if DVD, False otherwise
 33 |         """
 34 |         is_dvd = False
 35 | 
 36 |         if DVD_CHECK_MODE == "nobdinfo":
 37 |             if not self._has_bdinfo():
 38 |                 # no bdinfo given, assume dvds
 39 |                 is_dvd = True
 40 |         elif DVD_CHECK_MODE == "mediainfo":
 41 |             if (
 42 |                 "video" in self.mediainfo
 43 |                 and len(self.mediainfo["video"]) >= 1
 44 |                 and "height" in self.mediainfo["video"][0]
 45 |             ):
 46 |                 height = int(
 47 |                     "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
 48 |                 )
 49 |                 if height <= 576:
 50 |                     # height is 480p or 576p for dvds
 51 |                     # Note: checking standard is NTSC or PAL won't work, as some BDs are NTSC
 52 |                     is_dvd = True
 53 | 
 54 |         return is_dvd
 55 | 
 56 |     def is_ntsc_dvd(self):
 57 |         """
 58 |         Is this source an NTSC DVD?
 59 | 
 60 |         Returns
 61 |         -------
 62 |         boolean True if NTSC DVD, False otherwise
 63 |         """
 64 |         is_ntsc = False
 65 | 
 66 |         if self.is_dvd():
 67 |             if (
 68 |                 "video" in self.mediainfo
 69 |                 and len(self.mediainfo["video"]) >= 1
 70 |                 and "standard" in self.mediainfo["video"][0]
 71 |             ):
 72 |                 if self.mediainfo["video"][0]["standard"].upper() == "NTSC":
 73 |                     is_ntsc = True
 74 | 
 75 |         return is_ntsc
 76 | 
 77 |     def is_pal_dvd(self):
 78 |         """
 79 |         Is this source a PAL DVD?
 80 | 
 81 |         Returns
 82 |         -------
 83 |         boolean True if PAL DVD, False otherwise
 84 |         """
 85 |         is_pal = False
 86 | 
 87 |         if self.is_dvd():
 88 |             if (
 89 |                 "video" in self.mediainfo
 90 |                 and len(self.mediainfo["video"]) >= 1
 91 |                 and "standard" in self.mediainfo["video"][0]
 92 |             ):
 93 |                 if self.mediainfo["video"][0]["standard"].upper() == "PAL":
 94 |                     is_pal = True
 95 | 
 96 |         return is_pal
 97 | 
 98 |     def is_uhd(self):
 99 |         """
100 |         Is this source a UHD BluRay?
101 | 
102 |         Returns
103 |         -------
104 |         boolean True if UHD, False otherwise
105 |         """
106 |         is_uhd = False
107 | 
108 |         if (
109 |             "video" in self.mediainfo
110 |             and len(self.mediainfo["video"]) >= 1
111 |             and "height" in self.mediainfo["video"][0]
112 |         ):
113 |             height = int(
114 |                 "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]))
115 |             )
116 |             if height == 2160:
117 |                 is_uhd = True
118 | 
119 |         return is_uhd
120 | 
121 |     def is_dv(self):
122 |         """
123 |         Does this source have dolby vision?
124 | 
125 |         Returns
126 |         -------
127 |         boolean True if DV, False otherwise
128 |         """
129 |         is_dv = False
130 | 
131 |         if (
132 |             "video" in self.mediainfo
133 |             and len(self.mediainfo["video"]) >= 1
134 |             and "hdr_format" in self.mediainfo["video"][0]
135 |         ):
136 |             if "Dolby Vision" in self.mediainfo["video"][0]["hdr_format"]:
137 |                 is_dv = True
138 | 
139 |         return is_dv
140 | 
141 |     def _has_bdinfo(self):
142 |         """
143 |         Does the paste include bdinfo?
144 | 
145 |         Returns
146 |         -------
147 |         boolean True if has bdinfo, False otherwise
148 |         """
149 |         has_bdinfo = False
150 | 
151 |         if (
152 |             len(self.bdinfo["video"]) == 0
153 |             and len(self.bdinfo["audio"]) == 0
154 |             and len(self.bdinfo["subtitle"]) == 0
155 |         ):
156 |             has_bdinfo = False
157 |         else:
158 |             has_bdinfo = True
159 | 
160 |         return has_bdinfo
161 | 


--------------------------------------------------------------------------------