├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── integration-tests.yml │ └── lint.yml ├── .gitignore ├── LICENSE ├── README.md ├── example_html_viewer.html ├── test ├── discord-bot-test-expect.sh ├── discord-bot-test.sh ├── test1.ans ├── test1.in └── test1.sh └── vdator ├── .env.EXAMPLE ├── api.py ├── checker.py ├── checks ├── __init__.py ├── audio_track_conversions.py ├── audio_track_people.py ├── audio_track_spellcheck.py ├── chapter_language.py ├── chapter_padding.py ├── check.py ├── filename.py ├── flac_audio_tracks.py ├── has_chapters.py ├── metadata_default_flag.py ├── metadata_ids.py ├── mixins │ ├── __init__.py │ ├── is_commentary_track.py │ ├── is_movie.py │ ├── print_header.py │ └── section_id.py ├── mkvmerge.py ├── movie_name_format.py ├── muxing_mode.py ├── print_audio_track_names.py ├── print_chapters.py ├── print_text_tracks.py ├── remove_until_first_codec.py ├── text_default_flag.py ├── text_order.py ├── tracks_have_language.py ├── video_language_matches_first_audio_language.py └── video_track.py ├── data ├── codecs.json └── urls.json ├── helpers.py ├── main.py ├── nltk_people.py ├── parsers ├── __init__.py ├── bdinfo_parser.py ├── codecs_parser.py ├── match_bdinfo_audio_to_mediainfo.py ├── media_info_parser.py ├── paste_parser.py └── url_parser.py ├── reporter.py ├── requirements.txt └── source_detector.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Binary files that should not be normalized or diffed 2 | *.png binary 3 | *.jpg binary 4 | *.gif binary 5 | *.ico binary 6 | 7 | # Catch all for anything we forgot. Add rules if you get CRLF -> LF warnings. 8 | * eol=lf 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/vdator" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/workflows/integration-tests.yml: -------------------------------------------------------------------------------- 1 | name: integration tests 2 | 3 | on: [push, pull_request] 4 | 5 | defaults: 6 | run: 7 | working-directory: ./vdator 8 | 9 | jobs: 10 | dependencies: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: ["3.8", "3.9", "3.10"] 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install packages 25 | run: sudo apt-get install -y expect libhunspell-dev 26 | - name: Install python dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 30 | - name: Setup .env 31 | run: cp .env.EXAMPLE .env 32 | 33 | api: 34 | 35 | runs-on: ubuntu-latest 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | python-version: ["3.8", "3.9", "3.10"] 40 | 41 | steps: 42 | - uses: actions/checkout@v2 43 | - name: Set up Python ${{ matrix.python-version }} 44 | uses: actions/setup-python@v2 45 | with: 46 | python-version: ${{ matrix.python-version }} 47 | - name: Install packages 48 | run: sudo apt-get install -y libhunspell-dev 49 | - name: Install python dependencies 50 | run: | 51 | python -m pip install --upgrade pip 52 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 53 | - name: Setup .env 54 | run: cp .env.EXAMPLE .env 55 | - name: Run API 56 | env: 57 | MKVMERGE_VERSION: "Version 57.0.0 \"Till The End\" 2021-05-22" 58 | run: python3 api.py & 59 | - name: Test blank input to API 60 | run: ./test1.sh 61 | working-directory: ./test 62 | 63 | discord-bot: 64 | 65 | if: ${{ github.ref == 'refs/heads/main' }} 66 | 67 | runs-on: ubuntu-latest 68 | 69 | steps: 70 | - uses: actions/checkout@v2 71 | - name: Set up Python 3.10 72 | uses: actions/setup-python@v2 73 | with: 74 | python-version: "3.10" 75 | - name: Install packages 76 | run: sudo apt-get install -y expect libhunspell-dev 77 | - name: Install python dependencies 78 | run: | 79 | python -m pip install --upgrade pip 80 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 81 | - name: Setup .env 82 | run: cp .env.EXAMPLE .env 83 | - name: Discord bot can join server 84 | env: 85 | DISCORD_BOT_SECRET: ${{ secrets.DISCORD_BOT_SECRET }} 86 | run: ./discord-bot-test-expect.sh 87 | working-directory: ./test 88 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: [push, pull_request] 4 | 5 | defaults: 6 | run: 7 | working-directory: ./vdator 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10"] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install PyHunSpell 24 | run: sudo apt-get install libhunspell-dev 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | - name: Setup .env 30 | run: cp .env.EXAMPLE .env 31 | - name: Lint with black 32 | run: black . --check 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | vdator/bin/ 94 | vdator/include 95 | vdator/lib64 96 | vdator/pyvenv.cfg 97 | vdator/.env 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | 112 | test/*.out 113 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 werrpy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vdator 2 | > Remux validator Discord bot 3 | 4 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 5 | [![Lint](https://github.com/werrpy/vdator/actions/workflows/lint.yml/badge.svg)](https://github.com/werrpy/vdator/actions/workflows/lint.yml) 6 | [![Integration Tests](https://github.com/werrpy/vdator/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/werrpy/vdator/actions/workflows/integration-tests.yml) 7 | 8 | Takes a Pastebin link with BDInfo and MediaInfo dump, and validates the remux. 9 | 10 | Checks: 11 | ``` 12 | Video track names 13 | Movie/TV name format 14 | IMDB/TMDB ids 15 | Filename 16 | Video language matches first audio language 17 | No muxing mode 18 | Uses latest mkvtoolnix 19 | Video and audio track names match 20 | DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA 21 | Commentary to AC-3 @ 224 kbps 22 | Commentary track people and spellcheck 23 | Subtitle order 24 | Subtitle default flag 25 | Should have chapters 26 | Chapter languages 27 | Chapter padding 28 | ``` 29 | 30 | ### Table of Contents 31 | - [Supported pastebin sites](#supported-pastebin-sites) 32 | - [Setup](#setup) 33 | * [Create a python3 virtual environment](#create-a-python3-virtual-environment) 34 | * [Installing dependencies](#installing-dependencies) 35 | * [Updating dependencies](#updating-dependencies) 36 | * [Running manually](#running-manually) 37 | * [Running with systemd](#running-with-systemd) 38 | - [Lint](#lint) 39 | - [Using](#using) 40 | - [Adding a pastebin site](#adding-a-pastebin-site) 41 | - [Adding a new check](#adding-a-new-check) 42 | - [API](#api) 43 | 44 | ### Supported pastebin sites 45 | 46 | - [{d}paste](https://dpaste.com/) 47 | - [dpaste](https://dpaste.org/) 48 | - [Hastebin](https://hastebin.com/) 49 | - [Hey! Paste it](https://www.heypasteit.com/) 50 | - [CentOS Pastebin Service](https://paste.centos.org/) 51 | - [Paste.ee](https://paste.ee/) 52 | - [openSUSE Paste](https://paste.opensuse.org/) 53 | - [Pastebin](https://pastebin.com/) 54 | - [Rentry.co - Markdown Pastebin](https://rentry.co/) 55 | - [termbin](https://termbin.com/) 56 | - [TextBin](https://textbin.net/) 57 | 58 | ### Setup 59 | 60 | Requires Python >= 3.8 61 | 62 | Create a [Discord bot](https://discordapp.com/developers/docs/intro) and add it to a server. In the bot settings enable "MESSAGE CONTENT INTENT". 63 | 64 | Copy the enviornment variables template `vdator/.env.EXAMPLE` to `vdator/.env` 65 | Edit `vdator/.env` and set `DISCORD_BOT_SECRET` to your bot's token. 66 | 67 | Request a [TMDB API Key](https://developers.themoviedb.org/3/getting-started/introduction) and set `TMDB_API_KEY`. 68 | 69 | Don't forget to create channels on the server and set them in `vdator/.env` for `REVIEW_CHANNELS`, `REVIEW_REPLY_CHANNELS`, and `BOT_CHANNELS`. 70 | 71 | To prevent overwriting the `vdator/.env` file when pulling changes from git, do `git update-index --skip-worktree vdator/.env`. When you want to pull a new `.env` file, do `git update-index --no-skip-worktree vdator/.env`. 72 | 73 | #### Create a python3 virtual environment: 74 | 75 | Use [pip and virtual env](https://packaging.python.org/guides/installing-using-pip-and-virtualenv/) to run vdator. 76 | 77 | In the `vdator` directory run: 78 | ```bash 79 | python3 -m venv . 80 | ``` 81 | 82 | If the command fails to install pip, you will see an error similar to: 83 | ``` 84 | Error: Command '['python3', '-Im', 'ensurepip', '--upgrade', '--default-pip']' returned non-zero exit status 1. 85 | ``` 86 | Start over by creating a virtual environment without pip, and then install pip manually inside it: 87 | ```bash 88 | python3 -m venv --without-pip . 89 | source bin/activate 90 | curl https://bootstrap.pypa.io/get-pip.py | python3 91 | deactivate 92 | ``` 93 | 94 | #### Installing dependencies 95 | 96 | Install [PyHunSpell](https://github.com/blatinier/pyhunspell#installation) 97 | 98 | ```bash 99 | sudo apt install python3-dev libhunspell-dev 100 | ``` 101 | 102 | Install dependencies 103 | 104 | ```bash 105 | source bin/activate 106 | pip3 install -r requirements.txt 107 | deactivate 108 | ``` 109 | 110 | #### Updating dependencies 111 | 112 | ```bash 113 | source bin/activate 114 | pip3 install -r requirements.txt --upgrade 115 | pip3 freeze > requirements.txt 116 | deactivate 117 | ``` 118 | 119 | #### Running manually 120 | 121 | Run the bot manually for testing, exceptions will get printed: 122 | ```bash 123 | source bin/activate 124 | python3 main.py 125 | ``` 126 | 127 | #### Running with systemd 128 | 129 | Create a systemd service to run vdator, `/etc/systemd/system/vdator.service` 130 | 131 | ``` 132 | [Unit] 133 | Description=vdator 134 | After=multi-user.target 135 | 136 | [Service] 137 | WorkingDirectory=/home/USER/vdator/venv/vdator 138 | User= 139 | Group= 140 | ExecStart=/home/USER/vdator/venv/bin/python3 /home/USER/vdator/venv/vdator/main.py 141 | Type=idle 142 | Restart=always 143 | RestartSec=15 144 | 145 | [Install] 146 | WantedBy=multi-user.target 147 | ``` 148 | 149 | Set `User` to the user to run vdator as, and `Group` to the user's group (list with `groups`), usually both are the username. 150 | Replace `/home/USER/vdator/venv/` with the full path to your venv. 151 | 152 | Run `systemctl enable vdator` to start on boot. Use systemctl to start/stop vdator, `systemctl start vdator`, `systemctl stop vdator`, `systemctl restart vdator` 153 | 154 | ### Lint 155 | ```bash 156 | black . 157 | ``` 158 | 159 | ### Using 160 | 161 | Type `!help` in one of the bot channels for more information. 162 | 163 | ### Adding a pastebin site 164 | 165 | Edit `vdator/data/urls.json` and add your pastebin site. 166 | 167 | ``` 168 | # hostname 169 | "example.com": { 170 | # regex to get paste's unique identifier 171 | "slug_regex": "https://example.com/(.*)", 172 | 173 | # regex to check if paste links directly to raw text, using {} in place of the unique identifier 174 | "raw_url_regex": "https?://pastebin.com/raw/{}", 175 | 176 | # link to raw text, using {} in place of the unique identifier 177 | "raw_url": "https://example.com/raw/{}" 178 | } 179 | ``` 180 | 181 | ### Adding a new check 182 | 183 | Edit `vdator/checker.py`. 184 | 185 | In the `run_checks()` method add: 186 | ```python 187 | reply += MyNewCheck(self.reporter, self.mediainfo).run() 188 | ``` 189 | 190 | Edit `vdator/checks/__init__.py` and add: 191 | ```python 192 | from .my_check import * 193 | ``` 194 | 195 | Create `vdator/checks/my_check.py`: 196 | ```python 197 | from .check import * 198 | 199 | 200 | class MyNewCheck(Check): 201 | def __init__(self, reporter, mediainfo): 202 | super().__init__(reporter, mediainfo, "Error running my check") 203 | 204 | # overriding abstract method 205 | def get_reply(self): 206 | reply = "" 207 | # use self.mediainfo here 208 | # use has() and has_many() to check if the mediainfo keys you need exist, for example: 209 | # if has_many(self.mediainfo, "video.0", ["height"]): 210 | # safe to use self.mediainfo["video"][0]["height"] here 211 | # use self.reporter.print_report() to print status messages 212 | reply += self.reporter.print_report("info", "Some info message") 213 | # lastly return the string result of the check which is appended to the bot reply in run_checks() 214 | return reply 215 | ``` 216 | 217 | ### API 218 | 219 | Run with `python api.py` 220 | 221 | Default is port 5000, to use a different port set the PORT environment variable with `export PORT=5000 && python api.py` 222 | 223 | Example using Postman: 224 | ``` 225 | POST http://127.0.0.1:5000/text 226 | Body, raw 227 | [INSERT TEXT HERE] 228 | ``` 229 | 230 | Gives back json: 231 | ```json 232 | { 233 | "discord_reply":"...", 234 | "html_reply":"..." 235 | } 236 | ``` 237 | **discord_reply** - the text that the bot usually sends to discord 238 | **html_reply** - discord text formatted as html 239 | 240 | Insert the `html_reply` text into the `example_html_viewer.html` to see it formatted similar to discord. 241 | 242 | For testing, force a specific version of mkvmerge with 243 | 244 | ````bash 245 | export MKVMERGE_VERSION="Version 54.0.0 \"Hill The End\" 2021-05-22" && python api.py 246 | ```` 247 | 248 | -------------------------------------------------------------------------------- /example_html_viewer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 13 | 14 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 |
36 | 37 | 38 | 39 | 40 |
41 | 42 |
43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /test/discord-bot-test-expect.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/expect -f 2 | 3 | # timeout after 60 seconds 4 | set timeout 60 5 | 6 | spawn ./discord-bot-test.sh 7 | 8 | expect "I'm in\r" 9 | expect "vdator-github-actions#7018\r" 10 | -------------------------------------------------------------------------------- /test/discord-bot-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python ../vdator/main.py 4 | -------------------------------------------------------------------------------- /test/test1.ans: -------------------------------------------------------------------------------- 1 | {"discord_reply":"\u274c No mediainfo. Are you missing the `General` heading?\n> **Report**\n0 correct, 0 warnings, 1 error, 0 failures, and 0 info","html_reply":"

No mediainfo. Are you missing the General heading?

Report

0 correct, 0 warnings, 1 error, 0 failures, and 0 info

"} 2 | -------------------------------------------------------------------------------- /test/test1.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/werrpy/vdator/a6be476623551b797c94a3f5944c1d7c921bfb94/test/test1.in -------------------------------------------------------------------------------- /test/test1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl -s -X POST --data-binary "$(cat test1.in)" http://127.0.0.1:5000/text > test1.out 4 | diff test1.out test1.ans 5 | -------------------------------------------------------------------------------- /vdator/.env.EXAMPLE: -------------------------------------------------------------------------------- 1 | # channels to listen in and add reactions 2 | REVIEW_CHANNELS=upload-review, remux 3 | 4 | # channels to send full summary to if from review channel 5 | REVIEW_REPLY_CHANNELS=remux-bot 6 | 7 | # channels to listen in and post full summaries 8 | BOT_CHANNELS=remux-bot 9 | 10 | # trainee channels 11 | TRAINEE_CHANNELS=upload-review 12 | 13 | # internal channels 14 | INTERNAL_CHANNELS=remux 15 | 16 | # release group 17 | RELEASE_GROUP=GROUP 18 | 19 | # in-game, Now Playing... 20 | IN_GAME=Remux n00b 21 | 22 | # stop parsing after encountering this line 23 | IGNORE_AFTER_LINE=%%% 24 | 25 | # method to check for line to ignore after 26 | # 'equals' or 'contains' 27 | IGNORE_AFTER_LINE_METHOD=contains 28 | 29 | # ignore input until blank line if current line starts with one of these 30 | #IGNORE_UNTIL_BLANK_LINE_PREFIXES= 31 | 32 | # DVD check mode 33 | # 'mediainfo' to use mediainfo fields 34 | # 'nobdinfo' to assume DVD if no bdinfo given 35 | DVD_CHECK_MODE=nobdinfo 36 | 37 | DISCORD_BOT_SECRET= 38 | DISCORD_MSG_CHAR_LIMIT=2000 39 | 40 | TMDB_API_KEY= 41 | HUNSPELL_LANG=/usr/share/hunspell/en_US.dic, /usr/share/hunspell/en_US.aff 42 | MISSPELLED_IGNORE_LIST=upmix 43 | 44 | MKVTOOLNIX_NEWS=https://mkvtoolnix.download/doc/NEWS.md 45 | 46 | FILENAME_CUTS=Directors.Cut, Extended.Cut, Final.Cut, Theatrical, Uncut, Unrated 47 | 48 | # how many years off the movie year can be. (default: 1) 49 | #MOVIE_YEAR_OFFSET=1 50 | -------------------------------------------------------------------------------- /vdator/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experimental REST API 3 | 4 | > python3 api.py 5 | POST http://127.0.0.1:5000/text 6 | Body, raw 7 | [INSERT TEXT HERE] 8 | 9 | {"discord_reply":"...", "html_reply":"..."} 10 | """ 11 | 12 | import json, os, traceback 13 | from flask import Flask, jsonify, request 14 | 15 | from discord_markdown.discord_markdown import ( 16 | Compiler, 17 | convert_to_html as discord_markdown_convert_to_html, 18 | ) 19 | 20 | # Override discord_markdown.discord_markdown.Compiler.compile method to disable printing 21 | # https://github.com/bitjockey42/discord-markdown/blob/9b8d267e3bf1b333bccaae5619a3f2af0a5a54a1/discord_markdown/compiler.py#L29-L37 22 | def compile(self, markdown=False): 23 | if not self._parser.tree: 24 | self._parser.parse() 25 | self._code = "" 26 | for node in self._parser.tree: 27 | self._code = self._code + node.eval(markdown=markdown) 28 | self._code = self._code.strip() 29 | return self._code 30 | 31 | 32 | Compiler.compile = compile 33 | 34 | # parsers 35 | from parsers import * 36 | from source_detector import SourceDetector 37 | from reporter import Reporter 38 | from checker import Checker 39 | 40 | # script location 41 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) 42 | 43 | # initialize parsers 44 | bdinfo_parser = BDInfoParser() 45 | paste_parser = PasteParser(bdinfo_parser) 46 | mediainfo_parser = MediaInfoParser() 47 | 48 | with open(os.path.join(__location__, "data/codecs.json")) as f: 49 | codecs = json.load(f) 50 | codecs_parser = CodecsParser(codecs) 51 | 52 | source_detector = SourceDetector() 53 | reporter = Reporter() 54 | checker = Checker(codecs_parser, source_detector, reporter) 55 | 56 | app = Flask(__name__) 57 | 58 | 59 | @app.route("/text", methods=["POST"]) 60 | def parse_text(): 61 | """ 62 | POST http://127.0.0.1:5000/text 63 | Body, raw 64 | [INSERT TEXT HERE] 65 | """ 66 | 67 | reply = "" 68 | 69 | try: 70 | # setup/reset reporter 71 | reporter.setup() 72 | text = request.get_data().decode("utf-8") 73 | bdinfo, mediainfo, eac3to = paste_parser.parse(text) 74 | except: 75 | traceback.print_exc() 76 | reply += reporter.print_report("fail", "Failed to get paste") 77 | else: 78 | if mediainfo: 79 | try: 80 | # parse mediainfo 81 | mediainfo = mediainfo_parser.parse(mediainfo) 82 | except: 83 | traceback.print_exc() 84 | reply += reporter.print_report("fail", "Mediainfo parser failed") 85 | else: 86 | try: 87 | # setup checker 88 | checker.setup(bdinfo, mediainfo, eac3to, "remux-bot") 89 | except: 90 | traceback.print_exc() 91 | reply += reporter.print_report( 92 | "fail", "vdator failed to setup checker" 93 | ) 94 | else: 95 | try: 96 | reply += checker.run_checks() 97 | except: 98 | traceback.print_exc() 99 | reply += reporter.print_report("fail", "vdator failed to parse") 100 | else: 101 | reply += reporter.print_report( 102 | "error", "No mediainfo. Are you missing the `General` heading?" 103 | ) 104 | 105 | # report 106 | reply += "> **Report**\n" 107 | reply += reporter.display_report() 108 | 109 | # prevent infinite loop with 2 multi-line code blocks 110 | # https://github.com/bitjockey42/discord-markdown/issues/6 111 | reply_to_convert = reply.replace("```", "===") 112 | # remove quotes around sections 113 | reply_to_convert = reply_to_convert.replace("> **", "**") 114 | 115 | # convert to html 116 | reply_html = discord_markdown_convert_to_html(reply_to_convert) 117 | 118 | # format html 119 | reply_html = reply_html.replace("===", "
") 120 | # emojis 121 | reply_html = reply_html.replace( 122 | "☑", 123 | "", 124 | ) 125 | reply_html = reply_html.replace( 126 | "⚠", 127 | "", 128 | ) 129 | reply_html = reply_html.replace( 130 | "❌", 131 | "", 132 | ) 133 | 134 | data = {"discord_reply": reply, "html_reply": reply_html} 135 | 136 | return jsonify(data) 137 | 138 | 139 | PORT = os.environ.get("PORT", "5000") 140 | app.run(port=PORT) 141 | -------------------------------------------------------------------------------- /vdator/checker.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import logging, os 3 | 4 | # load environment variables 5 | load_dotenv() 6 | 7 | # TMDb API 8 | import tmdbsimple as tmdb 9 | 10 | tmdb.API_KEY = os.environ.get("TMDB_API_KEY") 11 | 12 | # IMDb API 13 | from imdb import Cinemagoer 14 | 15 | ia = Cinemagoer() 16 | logger = logging.getLogger("imdbpy") 17 | logger.disabled = True 18 | 19 | # checks 20 | from checks.mixins import PrintHeader, SectionId, IsCommentaryTrack 21 | from checks.remove_until_first_codec import RemoveUntilFirstCodec 22 | from checks import * 23 | 24 | # nltk data 25 | from nltk_people import download_nltk_data 26 | 27 | download_nltk_data() 28 | 29 | 30 | class Checker(PrintHeader, SectionId, IsCommentaryTrack): 31 | def __init__(self, codecs_parser, source_detector, reporter): 32 | self.codecs = codecs_parser 33 | self.remove_until_first_codec = RemoveUntilFirstCodec(codecs_parser) 34 | self.source_detector = source_detector 35 | self.reporter = reporter 36 | 37 | def setup(self, bdinfo, mediainfo, eac3to, channel_name): 38 | self.bdinfo = bdinfo 39 | self.mediainfo = mediainfo 40 | self.eac3to = eac3to 41 | self.channel_name = channel_name 42 | self.source_detector.setup(bdinfo, mediainfo) 43 | 44 | def run_checks(self): 45 | reply = "" 46 | 47 | # check metadata 48 | reply += self._print_header("Metadata") 49 | reply += CheckMovieNameFormat(self.reporter, self.mediainfo).run() 50 | reply += CheckMetadataIds(self.reporter, self.mediainfo, tmdb, ia).run() 51 | reply += CheckFilename( 52 | self.reporter, 53 | self.source_detector, 54 | self.codecs, 55 | self.remove_until_first_codec, 56 | self.mediainfo, 57 | self.bdinfo, 58 | self.channel_name, 59 | ).run() 60 | reply += CheckTracksHaveLanguage(self.reporter, self.mediainfo).run() 61 | reply += CheckVideoLanguageMatchesFirstAudioLanguage( 62 | self.reporter, self.mediainfo 63 | ).run() 64 | reply += CheckMuxingMode(self.reporter, self.mediainfo).run() 65 | reply += CheckMKVMerge(self.reporter, self.mediainfo).run() 66 | reply += CheckMetadataDefaultFlag(self.reporter, self.mediainfo).run() 67 | 68 | # check video 69 | reply += self._print_header("Video & Audio Tracks") 70 | reply += CheckVideoTrack( 71 | self.reporter, 72 | self.source_detector, 73 | self.codecs, 74 | self.mediainfo, 75 | self.bdinfo, 76 | ).run() 77 | 78 | # check audio 79 | reply += CheckPrintAudioTrackNames(self.reporter, self.mediainfo).run() 80 | reply += CheckAudioTrackConversions( 81 | self.reporter, 82 | self.source_detector, 83 | self.codecs, 84 | self.remove_until_first_codec, 85 | self.mediainfo, 86 | self.bdinfo, 87 | self.eac3to, 88 | ).run() 89 | # check FLAC audio using mediainfo 90 | reply += CheckFLACAudioTracks( 91 | self.reporter, self.remove_until_first_codec, self.mediainfo 92 | ).run() 93 | 94 | # TMDb and IMDb People API 95 | reply += CheckAudioTrackPeople( 96 | self.reporter, self.remove_until_first_codec, self.mediainfo, tmdb, ia 97 | ).run() 98 | reply += CheckAudioTrackSpellCheck( 99 | self.reporter, self.remove_until_first_codec, self.mediainfo 100 | ).run() 101 | 102 | # check text 103 | reply += self._print_header("Text Tracks") 104 | reply += CheckPrintTextTracks(self.reporter, self.mediainfo).run() 105 | reply += CheckTextOrder(self.reporter, self.mediainfo).run() 106 | reply += CheckTextDefaultFlag(self.reporter, self.mediainfo).run() 107 | 108 | # check chapters 109 | reply += CheckPrintChapters(self.reporter, self.mediainfo).run() 110 | reply += CheckHasChapters(self.reporter, self.mediainfo, self.eac3to).run() 111 | reply += CheckChapterLanguage(self.reporter, self.mediainfo).run() 112 | reply += CheckChapterPadding(self.reporter, self.mediainfo).run() 113 | 114 | return reply 115 | -------------------------------------------------------------------------------- /vdator/checks/__init__.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | # metadata 4 | from .movie_name_format import CheckMovieNameFormat 5 | from .metadata_ids import CheckMetadataIds 6 | from .filename import CheckFilename 7 | from .tracks_have_language import CheckTracksHaveLanguage 8 | from .video_language_matches_first_audio_language import ( 9 | CheckVideoLanguageMatchesFirstAudioLanguage, 10 | ) 11 | from .muxing_mode import CheckMuxingMode 12 | from .mkvmerge import CheckMKVMerge 13 | from .metadata_default_flag import CheckMetadataDefaultFlag 14 | 15 | # video 16 | from .video_track import CheckVideoTrack 17 | 18 | # audio 19 | from .print_audio_track_names import CheckPrintAudioTrackNames 20 | from .audio_track_conversions import CheckAudioTrackConversions 21 | from .flac_audio_tracks import CheckFLACAudioTracks 22 | from .audio_track_people import CheckAudioTrackPeople 23 | from .audio_track_spellcheck import CheckAudioTrackSpellCheck 24 | 25 | # text 26 | from .print_text_tracks import CheckPrintTextTracks 27 | from .text_order import CheckTextOrder 28 | from .text_default_flag import CheckTextDefaultFlag 29 | 30 | # chapters 31 | from .print_chapters import CheckPrintChapters 32 | from .has_chapters import CheckHasChapters 33 | from .chapter_language import CheckChapterLanguage 34 | from .chapter_padding import CheckChapterPadding 35 | -------------------------------------------------------------------------------- /vdator/checks/audio_track_conversions.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId, IsCommentaryTrack 3 | 4 | import re 5 | 6 | 7 | class CheckAudioTrackConversions(Check, SectionId, IsCommentaryTrack): 8 | def __init__( 9 | self, 10 | reporter, 11 | source_detector, 12 | codecs, 13 | remove_until_first_codec, 14 | mediainfo, 15 | bdinfo, 16 | eac3to, 17 | ): 18 | super().__init__(reporter, mediainfo, "Error checking audio track conversions") 19 | self.source_detector = source_detector 20 | self.codecs = codecs 21 | self.remove_until_first_codec = remove_until_first_codec 22 | self.bdinfo = bdinfo 23 | self.eac3to = eac3to 24 | 25 | # overriding abstract method 26 | def get_reply(self): 27 | reply = "" 28 | 29 | if self.source_detector.is_dvd(): 30 | # no audio track conversions for dvds 31 | reply += self.reporter.print_report( 32 | "info", "No audio track conversions to check for DVDs" 33 | ) 34 | return reply 35 | else: 36 | len_bdinfo = len(self.bdinfo["audio"]) 37 | len_mediainfo = len(self.mediainfo["audio"]) 38 | min_len = min(len_bdinfo, len_mediainfo) 39 | 40 | for i in range(0, min_len): 41 | # audio = dict{'name':'...', 'language':'...'} 42 | bdinfo_audio_title = re.sub( 43 | r"\s+", " ", self.bdinfo["audio"][i]["name"] 44 | ) 45 | bdinfo_audio_parts = bdinfo_audio_title.split(" / ") 46 | bdinfo_audio_parts_converted = bdinfo_audio_parts.copy() 47 | 48 | # check audio commentary 49 | (is_commentary, commentary_reply) = self._check_commentary(i) 50 | 51 | if is_commentary: 52 | reply += commentary_reply 53 | elif len(bdinfo_audio_parts) >= 1: 54 | optionally_flac = False 55 | # check audio conversions 56 | if bdinfo_audio_parts[0] == "DTS-HD Master Audio": 57 | # DTS-HD MA 58 | channels = float(bdinfo_audio_parts[1]) 59 | if is_float(bdinfo_audio_parts[1]): 60 | if channels < 3: 61 | # can be DTS-HD MA 1.0, DTS-HD MA 2.0, FLAC 1.0, and FLAC 2.0 62 | optionally_flac = True 63 | 64 | reply += self._check_audio_conversion( 65 | i, 66 | "DTS-HD Master Audio", 67 | ["DTS-HD Master Audio", "FLAC Audio"], 68 | ) 69 | else: 70 | reply += self._check_audio_conversion( 71 | i, "DTS-HD Master Audio", ["DTS-HD Master Audio"] 72 | ) 73 | 74 | elif bdinfo_audio_parts[0] == "LPCM Audio": 75 | if ( 76 | is_float(bdinfo_audio_parts[1]) 77 | and float(bdinfo_audio_parts[1]) < 3 78 | ): 79 | # LPCM 1.0 or 2.0 to FLAC 80 | reply += self._check_audio_conversion( 81 | i, "LPCM Audio", ["FLAC Audio"] 82 | ) 83 | bdinfo_audio_parts_converted[0] = "FLAC Audio" 84 | else: 85 | # LPCM > 2.0 to DTS-HD MA 86 | reply += self._check_audio_conversion( 87 | i, "LPCM Audio", ["DTS-HD Master Audio"] 88 | ) 89 | bdinfo_audio_parts_converted[0] = "DTS-HD Master Audio" 90 | 91 | # check track names match 92 | if "title" in self.mediainfo["audio"][i]: 93 | mediainfo_audio_title = self.mediainfo["audio"][i][ 94 | "title" 95 | ].strip() 96 | ( 97 | mediainfo_audio_title, 98 | _, 99 | _, 100 | ) = self.remove_until_first_codec.remove(mediainfo_audio_title) 101 | 102 | bdinfo_audio_title = " / ".join(bdinfo_audio_parts_converted) 103 | bdinfo_audio_titles = [bdinfo_audio_title] 104 | if optionally_flac: 105 | # May be converted to FLAC 106 | # Add DTS-HD MA 1.0/2.0/2.1 and FLAC 1.0/2.0/2.1 as options 107 | old_bdinfo_audio_parts_converted = ( 108 | bdinfo_audio_parts_converted.copy() 109 | ) 110 | 111 | bdinfo_audio_parts_converted[0] = "FLAC Audio" 112 | # FLAC 2.0/2.1 113 | bdinfo_audio_titles.append( 114 | " / ".join(bdinfo_audio_parts_converted) 115 | ) 116 | bdinfo_audio_parts_converted[1] = "1.0" 117 | # FLAC 1.0 118 | bdinfo_audio_titles.append( 119 | " / ".join(bdinfo_audio_parts_converted) 120 | ) 121 | 122 | # DTS-HD MA 2.0/2.1 123 | bdinfo_audio_titles.append( 124 | " / ".join(old_bdinfo_audio_parts_converted) 125 | ) 126 | old_bdinfo_audio_parts_converted[1] = "1.0" 127 | # DTS-HD MA 1.0 128 | bdinfo_audio_titles.append( 129 | " / ".join(old_bdinfo_audio_parts_converted) 130 | ) 131 | 132 | if self.mediainfo["audio"][i]["title"] in bdinfo_audio_titles: 133 | reply += self.reporter.print_report( 134 | "correct", 135 | "Audio " 136 | + self._section_id("audio", i) 137 | + ": Track names match", 138 | ) 139 | else: 140 | # use bitrate from mediainfo audio title 141 | m_bit_rate = re.search( 142 | r"(\d+)\skbps", mediainfo_audio_title 143 | ) 144 | if m_bit_rate: 145 | m_bit_rate = m_bit_rate.group(1) 146 | for j, title in enumerate(bdinfo_audio_titles): 147 | bdinfo_audio_titles[j] = re.sub( 148 | r"(.*\s)\d+(\skbps.*)", 149 | r"\g<1>{}\g<2>".format(m_bit_rate), 150 | title, 151 | ) 152 | 153 | # if it has TrueHD objects, add them to the audio channel 154 | if ( 155 | "number_of_dynamic_objects" 156 | in self.mediainfo["audio"][i] 157 | ): 158 | bdinfo_audio_title = re.sub( 159 | r"(.*\d\.\d)(.*)", 160 | r"\g<1>+{} objects\g<2>".format( 161 | self.mediainfo["audio"][i][ 162 | "number_of_dynamic_objects" 163 | ] 164 | ), 165 | bdinfo_audio_title, 166 | ) 167 | bdinfo_audio_titles.append(bdinfo_audio_title) 168 | 169 | # bdinfo_audio_titles has list of possible titles 170 | if mediainfo_audio_title not in bdinfo_audio_titles: 171 | reply += self.reporter.print_report( 172 | "error", 173 | "Audio " 174 | + self._section_id("audio", i) 175 | + ": Bad conversion:\n```fix\nBDInfo: " 176 | + bdinfo_audio_title 177 | + "\nMediaInfo: " 178 | + self.mediainfo["audio"][i]["title"] 179 | + "```", 180 | new_line=False, 181 | ) 182 | reply += show_diff( 183 | self.mediainfo["audio"][i]["title"], 184 | bdinfo_audio_title, 185 | ) 186 | else: 187 | reply += self.reporter.print_report( 188 | "correct", 189 | "Audio " 190 | + self._section_id("audio", i) 191 | + ": Track names match", 192 | ) 193 | else: 194 | reply += self.reporter.print_report( 195 | "error", 196 | "Audio " 197 | + self._section_id("audio", i) 198 | + ": Missing track name", 199 | ) 200 | 201 | if min_len < len_mediainfo: 202 | reply += self.reporter.print_report( 203 | "warning", 204 | "Checked first `{}/{}` audio tracks".format(min_len, len_mediainfo), 205 | ) 206 | 207 | return reply 208 | 209 | def _check_commentary(self, i): 210 | reply, is_commentary = "", False 211 | 212 | if self._is_commentary_track(self.mediainfo["audio"][i]["title"]): 213 | is_commentary = True 214 | # audio = dict{'name':'...', 'language':'...'} 215 | if self.bdinfo["audio"][i]["name"].count("/") >= 1: 216 | bdinfo_audio_format = ( 217 | self.bdinfo["audio"][i]["name"].split("/")[0].strip() 218 | ) 219 | 220 | if bdinfo_audio_format == "Dolby Digital Audio": 221 | if "format" in self.mediainfo["audio"][i]: 222 | if self.mediainfo["audio"][i]["format"] == "AC-3": 223 | reply += self.reporter.print_report( 224 | "correct", 225 | "Audio " 226 | + self._section_id("audio", i) 227 | + ": Commentary already AC-3", 228 | ) 229 | else: 230 | reply += self.reporter.print_report( 231 | "error", 232 | "Audio " 233 | + self._section_id("audio", i) 234 | + ": Commentary should be AC-3 instead of " 235 | + self.mediainfo["audio"][i]["format"], 236 | ) 237 | else: 238 | reply += self.reporter.print_report( 239 | "error", 240 | "Audio " 241 | + self._section_id("audio", i) 242 | + ": Commentary does not have a format", 243 | ) 244 | 245 | return is_commentary, reply 246 | else: 247 | reply += self.reporter.print_report( 248 | "warning", 249 | "Audio #" 250 | + self._section_id("audio", i) 251 | + ": Cannot verify commentary audio conversion", 252 | ) 253 | return is_commentary, reply 254 | 255 | if ( 256 | "format" in self.mediainfo["audio"][i] 257 | and self.mediainfo["audio"][i]["format"] == "AC-3" 258 | ): 259 | if "bit_rate" in self.mediainfo["audio"][i]: 260 | bit_rate = "".join( 261 | re.findall(r"[\d]+", self.mediainfo["audio"][i]["bit_rate"]) 262 | ) 263 | if bit_rate == "224": 264 | reply += self.reporter.print_report( 265 | "correct", 266 | "Audio " 267 | + self._section_id("audio", i) 268 | + ": Commentary converted to `AC-3 @ 224 kbps`", 269 | ) 270 | else: 271 | reply += self.reporter.print_report( 272 | "error", 273 | "Audio " 274 | + self._section_id("audio", i) 275 | + ": Commentary AC-3 bitrate should be `224 kbps` instead of `" 276 | + self.mediainfo["audio"][i]["bit_rate"] 277 | + "`", 278 | ) 279 | else: 280 | reply += self.reporter.print_report( 281 | "error", 282 | "Audio " 283 | + self._section_id("audio", i) 284 | + ": Commentary AC-3 does not have a bitrate", 285 | ) 286 | else: 287 | reply += self.reporter.print_report( 288 | "info", 289 | "Audio " 290 | + self._section_id("audio", i) 291 | + ": Commentary may be converted to AC-3", 292 | ) 293 | 294 | return is_commentary, reply 295 | 296 | def _check_audio_conversion(self, i, audio_from, audio_to): 297 | reply = "" 298 | 299 | # verify audio track titles 300 | if ( 301 | " / " not in self.bdinfo["audio"][i]["name"] 302 | or "title" not in self.mediainfo["audio"][i] 303 | or " / " not in self.mediainfo["audio"][i]["title"] 304 | ): 305 | reply += self.reporter.print_report( 306 | "warning", "Could not verify audio " + self._section_id("audio", i) 307 | ) 308 | return reply 309 | 310 | # [codec, channel, sampling rate, bit rate, bit depth] 311 | bdinfo_audio_parts = self.bdinfo["audio"][i]["name"].split(" / ") 312 | if len(bdinfo_audio_parts) <= 4: 313 | reply += self.reporter.print_report( 314 | "warning", "Could not verify audio " + self._section_id("audio", i) 315 | ) 316 | return reply 317 | 318 | mediainfo_audio_title = self.mediainfo["audio"][i]["title"] 319 | (mediainfo_audio_title, _, _) = self.remove_until_first_codec.remove( 320 | mediainfo_audio_title 321 | ) 322 | 323 | # [codec, channel, sampling rate, bit rate, bit depth] 324 | mediainfo_parts = mediainfo_audio_title.split(" / ") 325 | if len(mediainfo_parts) <= 4: 326 | reply += self.reporter.print_report( 327 | "warning", "Could not verify audio " + self._section_id("audio", i) 328 | ) 329 | return reply 330 | 331 | # verify audio conversions 332 | if mediainfo_parts[0] in audio_to: 333 | disable_channels_check = self._eac3to_log_has_mono() 334 | 335 | if ( 336 | not disable_channels_check 337 | and mediainfo_parts[1] != bdinfo_audio_parts[1] 338 | ): 339 | reply += self.reporter.print_report( 340 | "error", 341 | "Audio " 342 | + self._section_id("audio", i) 343 | + ": Channels should be `" 344 | + bdinfo_audio_parts[1] 345 | + "` instead of `" 346 | + mediainfo_parts[1] 347 | + "`", 348 | ) 349 | 350 | # mediainfo bitrate should be less than bdinfo bitrate 351 | try: 352 | m_bit_rate = int( 353 | "".join(re.findall(r"\d+", mediainfo_parts[3].strip())) 354 | ) 355 | 356 | bd_bit_rate = int( 357 | "".join(re.findall(r"\d+", bdinfo_audio_parts[3].strip())) 358 | ) 359 | 360 | if m_bit_rate > bd_bit_rate: 361 | reply += self.reporter.print_report( 362 | "error", 363 | "Audio " 364 | + self._section_id("audio", i) 365 | + ": MediaInfo bitrate is greater than BDInfo bitrate: `" 366 | + str(m_bit_rate) 367 | + " kbps > " 368 | + str(bd_bit_rate) 369 | + " kbps`", 370 | ) 371 | except ValueError: 372 | pass 373 | else: 374 | reply += self.reporter.print_report( 375 | "error", 376 | "Audio " 377 | + self._section_id("audio", i) 378 | + " should be converted to one of [" 379 | + ", ".join(audio_to) 380 | + "]", 381 | ) 382 | 383 | return reply 384 | 385 | def _eac3to_log_has_mono(self): 386 | # get command-lines 387 | 388 | cmd_lines_mono = list() 389 | for log in self.eac3to: 390 | cmd_lines_mono.extend( 391 | [ 392 | l.lower() 393 | for l in log 394 | if l.lower().startswith("command line:") 395 | and "-mono" in l.lower().split() 396 | ] 397 | ) 398 | 399 | return len(cmd_lines_mono) > 0 400 | -------------------------------------------------------------------------------- /vdator/checks/audio_track_people.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | from nltk_people import extract_names 5 | 6 | 7 | class CheckAudioTrackPeople(Check, SectionId): 8 | def __init__(self, reporter, remove_until_first_codec, mediainfo, tmdb, ia): 9 | super().__init__(reporter, mediainfo, "Error checking IMDb/TMDb people") 10 | self.remove_until_first_codec = remove_until_first_codec 11 | self.tmdb = tmdb 12 | self.ia = ia 13 | 14 | # overriding abstract method 15 | def get_reply(self): 16 | reply = "" 17 | 18 | # check people in audio track names 19 | for i, _ in enumerate(self.mediainfo["audio"]): 20 | if "title" in self.mediainfo["audio"][i]: 21 | title = self.mediainfo["audio"][i]["title"] 22 | 23 | # skip if has an audio codec 24 | _, _, found_codec = self.remove_until_first_codec.remove(title) 25 | if found_codec: 26 | continue 27 | 28 | # try to match names 29 | matched_names = list() 30 | names = extract_names(title) 31 | search = self.tmdb.Search() 32 | for n in names: 33 | # TMDb API 34 | try: 35 | search.person(query=n) 36 | for s in search.results: 37 | if n == s["name"]: 38 | matched_names.append(n) 39 | except: 40 | reply += self.reporter.print_report( 41 | "info", 42 | "Audio " 43 | + self._section_id("audio", i) 44 | + ": Failed to get TMDb people data", 45 | ) 46 | # IMDb API 47 | try: 48 | for person in self.ia.search_person(n): 49 | if n == person["name"]: 50 | matched_names.append(n) 51 | except: 52 | reply += self.reporter.print_report( 53 | "info", 54 | "Audio " 55 | + self._section_id("audio", i) 56 | + ": Failed to get IMDb people data", 57 | ) 58 | matched_names = set(matched_names) 59 | if len(matched_names) > 0: 60 | reply += self.reporter.print_report( 61 | "correct", 62 | "Audio " 63 | + self._section_id("audio", i) 64 | + " People Matched: `" 65 | + ", ".join(matched_names) 66 | + "`", 67 | ) 68 | unmatched_names = set(names) - set(matched_names) 69 | if len(unmatched_names) > 0: 70 | reply += self.reporter.print_report( 71 | "warning", 72 | "Audio " 73 | + self._section_id("audio", i) 74 | + " People Unmatched: `" 75 | + ", ".join(unmatched_names) 76 | + "`", 77 | ) 78 | 79 | return reply 80 | -------------------------------------------------------------------------------- /vdator/checks/audio_track_spellcheck.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | from dotenv import load_dotenv 5 | import nltk 6 | from nltk_people import extract_names 7 | import hunspell, os, string 8 | 9 | # load environment variables 10 | load_dotenv() 11 | 12 | HUNSPELL_LANG = [x.strip() for x in os.environ.get("HUNSPELL_LANG").split(",")] 13 | MISSPELLED_IGNORE_LIST = [ 14 | x.strip() for x in os.environ.get("MISSPELLED_IGNORE_LIST").split(",") 15 | ] 16 | 17 | 18 | class CheckAudioTrackSpellCheck(Check, SectionId): 19 | def __init__(self, reporter, remove_until_first_codec, mediainfo): 20 | super().__init__(reporter, mediainfo, "Error spell checking audio track names") 21 | self.hobj = hunspell.HunSpell(HUNSPELL_LANG[0], HUNSPELL_LANG[1]) 22 | self.remove_until_first_codec = remove_until_first_codec 23 | 24 | # overriding abstract method 25 | def get_reply(self): 26 | reply = "" 27 | 28 | # spellcheck audio track names 29 | for i, _ in enumerate(self.mediainfo["audio"]): 30 | if "title" in self.mediainfo["audio"][i]: 31 | title, title_parts, found_codec = self.remove_until_first_codec.remove( 32 | self.mediainfo["audio"][i]["title"] 33 | ) 34 | 35 | # spellcheck title parts before codec or entire audio title 36 | spellcheck_text = " ".join(title_parts) if found_codec else title 37 | if spellcheck_text: 38 | # map punctuation to space 39 | translator = str.maketrans( 40 | string.punctuation, " " * len(string.punctuation) 41 | ) 42 | spellcheck_text = spellcheck_text.translate(translator) 43 | 44 | # ignore names 45 | ignore_list = extract_names(spellcheck_text) 46 | ignore_list = [a for b in ignore_list for a in b.split()] 47 | 48 | # tokenize 49 | tokens = nltk.word_tokenize(spellcheck_text) 50 | tokens = [t for t in tokens if t not in ignore_list] 51 | 52 | misspelled_words = list() 53 | for t in tokens: 54 | if not self.hobj.spell(t): 55 | # t is misspelled 56 | misspelled_words.append(t) 57 | 58 | misspelled_words = set(misspelled_words) 59 | misspelled_words = [ 60 | word 61 | for word in misspelled_words 62 | if word.lower() not in MISSPELLED_IGNORE_LIST 63 | ] 64 | if len(misspelled_words) > 0: 65 | reply += self.reporter.print_report( 66 | "error", 67 | "Audio " 68 | + self._section_id("audio", i) 69 | + " Misspelled: `" 70 | + ", ".join(misspelled_words) 71 | + "`", 72 | ) 73 | 74 | return reply 75 | -------------------------------------------------------------------------------- /vdator/checks/chapter_language.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | from iso639 import languages as iso639_languages 4 | from langdetect import detect as langdetect_detect, DetectorFactory 5 | 6 | # make language detection deterministic 7 | DetectorFactory.seed = 0 8 | 9 | 10 | class CheckChapterLanguage(Check): 11 | def __init__(self, reporter, mediainfo): 12 | super().__init__(reporter, mediainfo, "Error checking chapter language") 13 | 14 | # overriding abstract method 15 | def get_reply(self): 16 | reply = "" 17 | 18 | if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0: 19 | if len(self.mediainfo["menu"]) >= 1: 20 | for i, chapters in enumerate(self.mediainfo["menu"]): 21 | if len(chapters) >= 1: 22 | # chapter numbers that have an invalid language 23 | invalid_ch_lang_nums = list() 24 | # chapters = list of chapters 25 | # [{'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']}] 26 | # {'time': '...', 'titles': [{'language': '...', 'title': '...'}, ...], 'languages': ['...', '...']} 27 | ch_0 = chapters[0] 28 | # concatenate all chapter titles into phrases 29 | # ch_0["languages"] = ['...', '...'] 30 | # chapter_phrases = {'de': '...', 'en': '...'} 31 | chapter_phrases = {k: "" for k in ch_0["languages"]} 32 | # list of detected languages with chapter languages as keys 33 | # chapter_langs = {'de': [...], 'en': [...]} 34 | chapter_langs = {k: list() for k in ch_0["languages"]} 35 | 36 | for ch in chapters: 37 | for j, lang in enumerate(ch["languages"]): 38 | if lang: 39 | try: 40 | ch_lang = iso639_languages.get(part1=lang) 41 | # store chapter language 42 | chapter_langs[lang].append(ch_lang) 43 | except KeyError: 44 | # store invalid chapter number 45 | invalid_ch_lang_nums.append(str(j + 1)) 46 | else: 47 | # store invalid chapter number 48 | invalid_ch_lang_nums.append(str(j + 1)) 49 | 50 | for title in ch["titles"]: 51 | # store as key "NA" if there is no chapter language set 52 | if title["language"] is None: 53 | title["language"] = "NA" 54 | if title["language"] not in chapter_phrases: 55 | chapter_phrases[title["language"]] = "" 56 | chapter_phrases[title["language"]] += ( 57 | title["title"] + "\n" 58 | ) 59 | 60 | if len(invalid_ch_lang_nums) > 0: 61 | if len(invalid_ch_lang_nums) == len(chapters): 62 | reply += self.reporter.print_report( 63 | "error", 64 | f"Chapters {i + 1}: All chapters do not have a language set", 65 | ) 66 | elif len(invalid_ch_lang_nums) > 0: 67 | reply += self.reporter.print_report( 68 | "error", 69 | f"Chapters {i + 1}: The following chapters do not have a language set: `" 70 | + ", ".join(invalid_ch_lang_nums) 71 | + "`", 72 | ) 73 | else: 74 | reply += self.reporter.print_report( 75 | "correct", 76 | f"Chapters {i + 1}: All chapters have a language set", 77 | ) 78 | 79 | for k, chapter_phrase in chapter_phrases.items(): 80 | if k == "NA": 81 | reply += self.reporter.print_report( 82 | "error", 83 | f"Chapters {i + 1}: No chapter language set", 84 | ) 85 | continue 86 | if chapter_phrase: 87 | chapter_langs[k] = list(set(chapter_langs[k])) 88 | try: 89 | detected_lang = langdetect_detect(chapter_phrase) 90 | ch_detected_lang = iso639_languages.get( 91 | part1=detected_lang 92 | ) 93 | if ch_detected_lang in chapter_langs[k]: 94 | reply += self.reporter.print_report( 95 | "correct", 96 | f"Chapters {i + 1}: Language matches detected language: `" 97 | + ch_detected_lang.name 98 | + "`", 99 | ) 100 | else: 101 | chapter_langs_names = ", ".join( 102 | list( 103 | set( 104 | [ 105 | detected_lang.name 106 | for detected_lang in chapter_langs[ 107 | k 108 | ] 109 | ] 110 | ) 111 | ) 112 | ) 113 | if chapter_langs_names: 114 | reply += self.reporter.print_report( 115 | "error", 116 | f"Chapters {i + 1}: Languages: `" 117 | + chapter_langs_names 118 | + "` do not match detected language: `" 119 | + ch_detected_lang.name 120 | + "`", 121 | ) 122 | else: 123 | reply += self.reporter.print_report( 124 | "error", 125 | f"Chapters {i + 1}: No chapter languages. Detected language: `" 126 | + ch_detected_lang.name 127 | + "`", 128 | ) 129 | except KeyError: 130 | reply += self.reporter.print_report( 131 | "warning", "Could not detect chapters language" 132 | ) 133 | else: 134 | reply += self.reporter.print_report( 135 | "error", "Must have at least 1 chapter menu" 136 | ) 137 | 138 | return reply 139 | -------------------------------------------------------------------------------- /vdator/checks/chapter_padding.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | import re 4 | 5 | 6 | class CheckChapterPadding(Check): 7 | def __init__(self, reporter, mediainfo): 8 | super().__init__(reporter, mediainfo, "Error checking chapter padding") 9 | 10 | # overriding abstract method 11 | def get_reply(self): 12 | reply, padded_correctly = "", True 13 | 14 | if "menu" in self.mediainfo and len(self.mediainfo["menu"]) > 0: 15 | if len(self.mediainfo["menu"]) >= 1: 16 | for i, menu in enumerate(self.mediainfo["menu"]): 17 | padded_correctly = True 18 | num_chapters = len(menu) 19 | for ch in menu: 20 | for title in ch["titles"]: 21 | if re.search( 22 | r"^chapter\s\d+", title["title"], re.IGNORECASE 23 | ): 24 | # numbered chapter 25 | ch_num = "".join(re.findall(r"[\d]+", title["title"])) 26 | if ch_num != ch_num.zfill(len(str(num_chapters))): 27 | padded_correctly = False 28 | break 29 | if padded_correctly: 30 | reply += self.reporter.print_report( 31 | "correct", f"Chapters {i + 1}: Properly padded" 32 | ) 33 | else: 34 | reply += self.reporter.print_report( 35 | "error", f"Chapters {i + 1}: Incorrect padding" 36 | ) 37 | 38 | return reply 39 | -------------------------------------------------------------------------------- /vdator/checks/check.py: -------------------------------------------------------------------------------- 1 | import sys, traceback 2 | 3 | # allow imports from parent directory 4 | sys.path.append("../") 5 | 6 | from abc import abstractmethod 7 | from pydash import has 8 | from helpers import has_many, show_diff, is_float 9 | 10 | 11 | class Check(object): 12 | def __init__(self, reporter, mediainfo, run_fail_msg): 13 | self.reporter = reporter 14 | self.mediainfo = mediainfo 15 | self.run_fail_msg = run_fail_msg 16 | 17 | def run(self): 18 | """ 19 | Runs the check and returns reply. 20 | Wraps check in try...except to prevent crashes 21 | 22 | Returns 23 | ------- 24 | reply string 25 | """ 26 | reply = "" 27 | try: 28 | reply += self.get_reply() 29 | except: 30 | traceback.print_exc() 31 | reply += self.reporter.print_report("fail", self.run_fail_msg) 32 | return reply 33 | 34 | @abstractmethod 35 | def get_reply(self): 36 | """ 37 | Gets reply from this check 38 | 39 | Returns 40 | ------- 41 | reply string 42 | """ 43 | pass 44 | -------------------------------------------------------------------------------- /vdator/checks/filename.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | from dotenv import load_dotenv 4 | import os, re, string, unidecode 5 | 6 | # load environment variables 7 | load_dotenv() 8 | 9 | CUTS = [None] + [x.strip() for x in os.environ.get("FILENAME_CUTS").split(",")] 10 | RELEASE_GROUP = os.environ.get("RELEASE_GROUP").strip() 11 | TRAINEE_CHANNELS = [x.strip() for x in os.environ.get("TRAINEE_CHANNELS").split(",")] 12 | INTERNAL_CHANNELS = [x.strip() for x in os.environ.get("INTERNAL_CHANNELS").split(",")] 13 | 14 | 15 | class CheckFilename(Check): 16 | def __init__( 17 | self, 18 | reporter, 19 | source_detector, 20 | codecs, 21 | remove_until_first_codec, 22 | mediainfo, 23 | bdinfo, 24 | channel_name, 25 | ): 26 | super().__init__(reporter, mediainfo, "Error checking filename") 27 | self.source_detector = source_detector 28 | self.codecs = codecs 29 | self.remove_until_first_codec = remove_until_first_codec 30 | self.bdinfo = bdinfo 31 | self.channel_name = channel_name 32 | 33 | # overriding abstract method 34 | def get_reply(self): 35 | reply = "" 36 | 37 | if has_many(self.mediainfo, "general.0", ["movie_name", "complete_name"]): 38 | complete_name = self.mediainfo["general"][0]["complete_name"] 39 | if "\\" in complete_name: 40 | complete_name = complete_name.split("\\")[-1] 41 | elif "/" in complete_name: 42 | complete_name = complete_name.split("/")[-1] 43 | 44 | # possible release names 45 | complete_name_lc = complete_name.lower() 46 | possible_release_names = [ 47 | self._construct_release_name( 48 | cut, 49 | hybird=("hybrid" in complete_name_lc), 50 | repack=("repack" in complete_name_lc), 51 | ) 52 | for cut in CUTS 53 | ] 54 | 55 | if ( 56 | self.channel_name in INTERNAL_CHANNELS 57 | and complete_name in possible_release_names 58 | ): 59 | reply += self.reporter.print_report( 60 | "correct", "Filename: `" + complete_name + "`" 61 | ) 62 | elif self._partial_match(possible_release_names, complete_name): 63 | reply += self.reporter.print_report( 64 | "correct", "Filename: `" + complete_name + "`" 65 | ) 66 | else: 67 | expected_release_name = possible_release_names[0] 68 | 69 | # pick the expected release name with the proper cut 70 | for i, cut in enumerate(CUTS[1:]): 71 | if cut in complete_name: 72 | expected_release_name = possible_release_names[i + 1] 73 | 74 | if self.channel_name not in INTERNAL_CHANNELS: 75 | expected_release_name += "GRouP.mkv" 76 | 77 | reply += self.reporter.print_report( 78 | "error", 79 | "Filename missmatch:\n```fix\nFilename: " 80 | + complete_name 81 | + "\nExpected: " 82 | + expected_release_name 83 | + "```", 84 | new_line=False, 85 | ) 86 | reply += show_diff(complete_name, expected_release_name) 87 | else: 88 | reply += self.reporter.print_report("error", "Cannot validate filename") 89 | 90 | return reply 91 | 92 | def _construct_release_name(self, cut=None, hybird=False, repack=False): 93 | release_name = "" 94 | 95 | if not self.source_detector.is_dvd(): 96 | # scan type must come from bdinfo 97 | bdinfo_video_parts = self.bdinfo["video"][0].split(" / ") 98 | scan_type = bdinfo_video_parts[2].strip()[-1].lower() 99 | 100 | if has_many(self.mediainfo, "video.0", ["height", "title"]) and has( 101 | self.mediainfo, "audio.0.title" 102 | ): 103 | # Name.S01E01 or Name.S01E01E02 104 | tv_show_name_search = re.search( 105 | r"(.+)\s-\s(S\d{2}(E\d{2})+)", 106 | self.mediainfo["general"][0]["movie_name"], 107 | ) 108 | # Name.Year 109 | movie_name_search = re.search( 110 | r"(.+)\s\((\d{4})\)", self.mediainfo["general"][0]["movie_name"] 111 | ) 112 | if tv_show_name_search: 113 | title = self._format_filename_title(tv_show_name_search.group(1)) 114 | season_episode = tv_show_name_search.group(2).strip() 115 | release_name += title + "." + season_episode 116 | elif movie_name_search: 117 | title = self._format_filename_title(movie_name_search.group(1)) 118 | year = movie_name_search.group(2).strip() 119 | release_name += title + "." + year 120 | else: 121 | release_name += self._format_filename_title( 122 | self.mediainfo["general"][0]["movie_name"] 123 | ) 124 | 125 | # with or without hybrid 126 | if hybird: 127 | release_name += ".Hybrid" 128 | 129 | # with or without repack 130 | if repack: 131 | release_name += ".REPACK" 132 | 133 | # check cuts here 134 | if cut is not None: 135 | release_name += "." + cut 136 | 137 | # resolution (ex. 1080p) 138 | height = "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"])) 139 | 140 | if self.source_detector.is_dvd(): 141 | # source DVD 142 | if "standard" in self.mediainfo["video"][0]: 143 | release_name += "." + self.mediainfo["video"][0]["standard"] 144 | release_name += ".DVD.REMUX" 145 | elif self.source_detector.is_uhd(): 146 | # source UHD BluRay 147 | release_name += "." + height 148 | release_name += scan_type 149 | release_name += ".UHD.BluRay.REMUX" 150 | # Dolby Vision (DV) 151 | if self.source_detector.is_dv(): 152 | release_name += ".DV" 153 | # SDR/HDR 154 | if self.mediainfo["video"][0]["color_primaries"] == "BT.2020": 155 | release_name += ".HDR" 156 | else: 157 | release_name += ".SDR" 158 | else: 159 | # source HD BluRay 160 | release_name += "." + height 161 | release_name += scan_type 162 | release_name += ".BluRay.REMUX" 163 | 164 | # video format (ex. AVC) 165 | main_video_title = self.mediainfo["video"][0]["title"].split(" / ") 166 | if len(main_video_title) >= 1: 167 | release_name += "." + self.codecs.get_video_codec_title_name( 168 | main_video_title[0].strip() 169 | ) 170 | 171 | main_audio_title = self.mediainfo["audio"][0]["title"] 172 | ( 173 | main_audio_title, 174 | _, 175 | _, 176 | ) = self.remove_until_first_codec.remove(main_audio_title) 177 | main_audio_title_parts = main_audio_title.split(" / ") 178 | 179 | audio_codec_title, main_audio_channels = None, None 180 | 181 | # get main audio codec 182 | if len(main_audio_title) > 0: 183 | main_audio_codec = main_audio_title_parts[0] 184 | if self.codecs.is_audio_title(main_audio_codec): 185 | audio_codec_title = self.codecs.get_audio_codec_title_name( 186 | main_audio_codec 187 | ) 188 | 189 | # get main audio channels 190 | if len(main_audio_title) > 1: 191 | main_audio_channels = main_audio_title_parts[1] 192 | search_channel_atmos = re.search( 193 | r"(\d.\d)\+\d+\sobjects", main_audio_channels 194 | ) 195 | if search_channel_atmos: 196 | main_audio_channels = search_channel_atmos.group(1) 197 | 198 | if ( 199 | audio_codec_title 200 | and main_audio_channels 201 | and is_float(main_audio_channels) 202 | ): 203 | # have main audio codec and channels 204 | if audio_codec_title == "TrueHD.Atmos": 205 | # atmos channel 206 | release_name += ".TrueHD." + main_audio_channels + ".Atmos" 207 | else: 208 | release_name += "." + audio_codec_title + "." + main_audio_channels 209 | 210 | # release group 211 | release_name += "-" 212 | if self.channel_name in INTERNAL_CHANNELS: 213 | release_name += RELEASE_GROUP + ".mkv" 214 | 215 | # replace multiple dots with one 216 | release_name = re.sub("\.+", ".", release_name) 217 | 218 | return release_name 219 | 220 | def _format_filename_title(self, title): 221 | title = title.strip() 222 | # remove accents 223 | title = unidecode.unidecode(title) 224 | # remove punctuation 225 | title = title.replace("&", "and") 226 | title = "".join([i for i in title if not i in string.punctuation or i == "."]) 227 | title = title.replace(":", ".") 228 | # replace spaces with dots 229 | title = title.replace(" ", ".") 230 | # force single dots 231 | title = re.sub(r"\.+", ".", title) 232 | return title 233 | 234 | def _partial_match(self, possible_names, name): 235 | for n in possible_names: 236 | if n in name: 237 | return True 238 | return False 239 | -------------------------------------------------------------------------------- /vdator/checks/flac_audio_tracks.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | import re 5 | 6 | 7 | class CheckFLACAudioTracks(Check, SectionId): 8 | def __init__(self, reporter, remove_until_first_codec, mediainfo): 9 | super().__init__(reporter, mediainfo, "Error checking FLAC audio tracks") 10 | self.remove_until_first_codec = remove_until_first_codec 11 | 12 | # overriding abstract method 13 | def get_reply(self): 14 | # check FLAC Audio tracks using mediainfo 15 | reply = "" 16 | 17 | if len(self.mediainfo["audio"]) > 0: 18 | for i, audio_track in enumerate(self.mediainfo["audio"]): 19 | # skip if no title 20 | if "title" not in audio_track: 21 | continue 22 | 23 | # skip if no codec info 24 | audio_title, _, found_codec = self.remove_until_first_codec.remove( 25 | audio_track["title"] 26 | ) 27 | if not found_codec: 28 | continue 29 | 30 | if "format" in audio_track and audio_track["format"] == "FLAC": 31 | channels = float( 32 | "".join( 33 | re.findall( 34 | r"\d*\.\d+|\d+", audio_track["channels"].strip().lower() 35 | ) 36 | ) 37 | ) 38 | sampling_rate = int( 39 | float( 40 | "".join( 41 | re.findall( 42 | r"\d*\.\d+|\d+", 43 | audio_track["sampling_rate"].strip().lower(), 44 | ) 45 | ) 46 | ) 47 | ) 48 | bit_rate = int( 49 | "".join( 50 | re.findall(r"\d+", audio_track["bit_rate"].strip().lower()) 51 | ) 52 | ) 53 | bit_depth = ( 54 | audio_track["bit_depth"] 55 | .strip() 56 | .lower() 57 | .replace(" bits", "-bit") 58 | ) 59 | test_title = ( 60 | "FLAC Audio / " 61 | + "{:.1f}".format(channels) 62 | + " / " 63 | + str(sampling_rate) 64 | + " kHz / " 65 | + str(bit_rate) 66 | + " kbps / " 67 | + bit_depth 68 | ) 69 | 70 | if test_title == audio_title: 71 | reply += self.reporter.print_report( 72 | "correct", 73 | "Audio " 74 | + self._section_id("audio", i) 75 | + ": FLAC Good track name (from MediaInfo)", 76 | ) 77 | else: 78 | reply += self.reporter.print_report( 79 | "error", 80 | "Audio " 81 | + self._section_id("audio", i) 82 | + ": FLAC Bad track name (from MediaInfo):\n```fix\nActual: " 83 | + audio_title 84 | + "\nExpected: " 85 | + test_title 86 | + "```", 87 | new_line=False, 88 | ) 89 | 90 | return reply 91 | -------------------------------------------------------------------------------- /vdator/checks/has_chapters.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | 4 | class CheckHasChapters(Check): 5 | def __init__(self, reporter, mediainfo, eac3to): 6 | super().__init__(reporter, mediainfo, "Error checking if should have chapters") 7 | self.eac3to = eac3to 8 | 9 | # overriding abstract method 10 | def get_reply(self): 11 | reply, should_have_chapters = "", False 12 | for log in self.eac3to: 13 | for l in log: 14 | if "chapters" in l: 15 | should_have_chapters = True 16 | if should_have_chapters: 17 | if len(self.mediainfo["menu"]) > 0: 18 | reply += self.reporter.print_report( 19 | "correct", "Has chapters (from eac3to log)" 20 | ) 21 | else: 22 | reply += self.reporter.print_report( 23 | "error", "Should have chapters (from eac3to log)" 24 | ) 25 | return reply 26 | -------------------------------------------------------------------------------- /vdator/checks/metadata_default_flag.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | 4 | class CheckMetadataDefaultFlag(Check): 5 | def __init__(self, reporter, mediainfo): 6 | super().__init__(reporter, mediainfo, "Error checking metadata default flag") 7 | 8 | # overriding abstract method 9 | def get_reply(self): 10 | # only one track of each type should be default=yes 11 | reply, default_yes_error = "", False 12 | track_types = ["audio", "text"] 13 | 14 | for track_type in track_types: 15 | default_yes_count = 0 16 | for track in self.mediainfo[track_type]: 17 | if "default" in track and track["default"].lower() == "yes": 18 | default_yes_count += 1 19 | if default_yes_count > 1: 20 | reply += self.reporter.print_report( 21 | "error", 22 | "Only 1 {} track should be `default=yes`".format(track_type), 23 | ) 24 | default_yes_error = True 25 | 26 | if not default_yes_error: 27 | reply += self.reporter.print_report( 28 | "correct", 29 | "Only 1 track of each type is `default=yes`", 30 | ) 31 | return reply 32 | -------------------------------------------------------------------------------- /vdator/checks/metadata_ids.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import IsMovie 3 | 4 | from dotenv import load_dotenv 5 | import datetime, os, re 6 | import imdb 7 | 8 | # load environment variables 9 | load_dotenv() 10 | 11 | # how many years off the movie year can be. (default: 1) 12 | MOVIE_YEAR_OFFSET = int(os.environ.get("MOVIE_YEAR_OFFSET", "1").strip()) 13 | 14 | 15 | class CheckMetadataIds(Check, IsMovie): 16 | def __init__(self, reporter, mediainfo, tmdb, ia): 17 | super().__init__(reporter, mediainfo, "Error parsing IMDb/TMDb ids") 18 | self.tmdb = tmdb 19 | self.ia = ia 20 | 21 | # overriding abstract method 22 | def get_reply(self): 23 | reply = "" 24 | 25 | imdb_movie, tmdb_info, tmdb_year = None, None, None 26 | 27 | movie_data = {"name": None, "year": None} 28 | 29 | matched = { 30 | "imdb_title": False, 31 | "imdb_year": False, 32 | "tmdb_title": False, 33 | "tmdb_year": False, 34 | # matched movie title/year with either imdb or tmdb 35 | "title": False, 36 | "year": False, 37 | "title_replied": False, 38 | "year_replied": False, 39 | } 40 | 41 | # is it a movie or tv show? 42 | is_movie = self._is_movie() 43 | 44 | # extract movie name and year or tv show name 45 | if has(self.mediainfo, "general.0.movie_name"): 46 | if is_movie: 47 | # movie 48 | movie_name = re.search( 49 | r"^(.+)\((\d{4})\)", self.mediainfo["general"][0]["movie_name"] 50 | ) 51 | if movie_name: 52 | movie_data["name"] = movie_name.group(1).strip() 53 | movie_data["year"] = movie_name.group(2).strip() 54 | else: 55 | # tv show 56 | tv_show_name = re.search( 57 | r"^(.+)\s-\s.+\s-\s.+", self.mediainfo["general"][0]["movie_name"] 58 | ) 59 | if tv_show_name: 60 | movie_data["name"] = tv_show_name.group(1).strip() 61 | 62 | if has(self.mediainfo, "general.0.imdb"): 63 | imdb_id = "".join( 64 | re.findall(r"[\d]+", self.mediainfo["general"][0]["imdb"]) 65 | ) 66 | try: 67 | imdb_movie = self.ia.get_movie(imdb_id) 68 | except imdb._exceptions.IMDbParserError: 69 | reply += self.reporter.print_report( 70 | "error", 71 | "Invalid IMDb id: `" + self.mediainfo["general"][0]["imdb"] + "`", 72 | ) 73 | except: 74 | # imdb._exceptions.IMDbDataAccessError 75 | reply += self.reporter.print_report( 76 | "info", 77 | "Failed to get IMDb movie data for id: `" 78 | + self.mediainfo["general"][0]["imdb"] 79 | + "`", 80 | ) 81 | else: 82 | # force single space in movie name 83 | imdb_movie["title"] = re.sub(r"\s+", " ", imdb_movie["title"]) 84 | matched["imdb_title"] = movie_data["name"] == imdb_movie["title"] 85 | if is_movie: 86 | matched["imdb_year"] = self._year_range( 87 | imdb_movie["year"], movie_data["year"] 88 | ) 89 | 90 | if has(self.mediainfo, "general.0.tmdb"): 91 | tmdb_id = "".join( 92 | re.findall(r"[\d]+", self.mediainfo["general"][0]["tmdb"]) 93 | ) 94 | # movie or tv show 95 | tmdb_data = self.tmdb.Movies(tmdb_id) if is_movie else self.tmdb.TV(tmdb_id) 96 | 97 | try: 98 | tmdb_info = tmdb_data.info() 99 | # force single space in movie name 100 | if "title" in tmdb_info: 101 | tmdb_info["title"] = re.sub(r"\s+", " ", tmdb_info["title"]) 102 | except: 103 | reply += self.reporter.print_report( 104 | "info", 105 | "Failed to get TMDb data for id: `" 106 | + self.mediainfo["general"][0]["tmdb"] 107 | + "`", 108 | ) 109 | else: 110 | if is_movie: 111 | # movie 112 | if "release_date" in tmdb_info and tmdb_info["release_date"]: 113 | datetime_obj = datetime.datetime.strptime( 114 | tmdb_info["release_date"], "%Y-%m-%d" 115 | ) 116 | tmdb_year = str(datetime_obj.year) 117 | # tmdb_info["original_title"] is original title 118 | # tmdb_info["title"] is the translated title in whatever language you're requesting 119 | matched["tmdb_title"] = ( 120 | "title" in tmdb_info 121 | and movie_data["name"] == tmdb_info["title"] 122 | ) 123 | matched["tmdb_year"] = tmdb_year and self._year_range( 124 | tmdb_year, movie_data["year"] 125 | ) 126 | else: 127 | # tv show 128 | matched["tmdb_title"] = ( 129 | "title" in tmdb_info 130 | and movie_data["name"] == tmdb_info["title"] 131 | ) 132 | 133 | # matched title/year with either imdb or tmdb 134 | matched["title"] = matched["imdb_title"] or matched["tmdb_title"] 135 | matched["year"] = matched["imdb_year"] or matched["tmdb_year"] 136 | 137 | if has(self.mediainfo, "general.0.imdb") or has( 138 | self.mediainfo, "general.0.tmdb" 139 | ): 140 | if is_movie: 141 | # movie 142 | if matched["title"] and matched["year"]: 143 | reply += self.reporter.print_report( 144 | "correct", "Matched movie name and year with IMDb/TMDb" 145 | ) 146 | else: 147 | if matched["title"]: 148 | reply += self.reporter.print_report( 149 | "correct", "Matched movie name with IMDb/TMDb" 150 | ) 151 | else: 152 | if imdb_movie and "title" in imdb_movie and imdb_movie["title"]: 153 | reply += self.reporter.print_report( 154 | "error", "IMDb: Name: `" + imdb_movie["title"] + "`" 155 | ) 156 | if movie_data["name"]: 157 | reply += show_diff( 158 | movie_data["name"], imdb_movie["title"] 159 | ) 160 | matched["title_replied"] = True 161 | # tmdb_info["original_title"] is original title 162 | # tmdb_info["title"] is the translated title in whatever language you're requesting 163 | if tmdb_info and "title" in tmdb_info and tmdb_info["title"]: 164 | reply += self.reporter.print_report( 165 | "error", "TMDb: Name: `" + tmdb_info["title"] + "`" 166 | ) 167 | if movie_data["name"]: 168 | reply += show_diff( 169 | movie_data["name"], tmdb_info["title"] 170 | ) 171 | matched["title_replied"] = True 172 | if not matched["title_replied"]: 173 | reply += self.reporter.print_report( 174 | "error", "Failed to match movie name with IMDb/TMDb" 175 | ) 176 | 177 | if matched["year"]: 178 | reply += self.reporter.print_report( 179 | "correct", "Matched movie year with IMDb/TMDb" 180 | ) 181 | else: 182 | if imdb_movie and "year" in imdb_movie: 183 | reply += self.reporter.print_report( 184 | "error", "IMDb: Year: `" + str(imdb_movie["year"]) + "`" 185 | ) 186 | matched["year_replied"] = True 187 | if tmdb_year: 188 | reply += self.reporter.print_report( 189 | "error", "TMDb: Year: `" + str(tmdb_year) + "`" 190 | ) 191 | matched["year_replied"] = True 192 | if not matched["year_replied"]: 193 | reply += self.reporter.print_report( 194 | "error", "Failed to match movie year with IMDb/TMDb" 195 | ) 196 | else: 197 | # tv show 198 | if matched["title"]: 199 | reply += self.reporter.print_report( 200 | "correct", "Matched tv show name with IMDb/TMDb" 201 | ) 202 | else: 203 | if imdb_movie and "title" in imdb_movie: 204 | reply += self.reporter.print_report( 205 | "error", "IMDb: Name: `" + imdb_movie["title"] + "`" 206 | ) 207 | matched["title_replied"] = True 208 | if tmdb_info and "name" in tmdb_info: 209 | reply += self.reporter.print_report( 210 | "error", "TMDb: Name: `" + tmdb_info["name"] + "`" 211 | ) 212 | matched["title_replied"] = True 213 | if not matched["title_replied"]: 214 | reply += self.reporter.print_report( 215 | "error", "Failed to match tv show name with IMDb/TMDb" 216 | ) 217 | 218 | return reply 219 | 220 | def _year_range(self, year, test_year, offset=MOVIE_YEAR_OFFSET): 221 | # self._year_range(year, test_year) 222 | # example: with offset = 1, and year = 2004, test_year can be between 2003 and 2005 inclusive 223 | # 2002 in range(2004 - 1, (2004 + 1) + 1) False 224 | # 2003 in range(2004 - 1, (2004 + 1) + 1) True 225 | # 2004 in range(2004 - 1, (2004 + 1) + 1) True 226 | # 2005 in range(2004 - 1, (2004 + 1) + 1) True 227 | # 2006 in range(2004 - 1, (2004 + 1) + 1) False 228 | if not (year and test_year): 229 | return False 230 | year = int(year) 231 | test_year = int(test_year) 232 | return test_year in range(year - offset, (year + offset) + 1) 233 | -------------------------------------------------------------------------------- /vdator/checks/mixins/__init__.py: -------------------------------------------------------------------------------- 1 | from .is_commentary_track import * 2 | from .is_movie import * 3 | from .print_header import * 4 | from .section_id import * 5 | -------------------------------------------------------------------------------- /vdator/checks/mixins/is_commentary_track.py: -------------------------------------------------------------------------------- 1 | class IsCommentaryTrack(object): 2 | def _is_commentary_track(self, title): 3 | return "commentary" in title.lower().split() 4 | -------------------------------------------------------------------------------- /vdator/checks/mixins/is_movie.py: -------------------------------------------------------------------------------- 1 | from pydash import has 2 | import re 3 | 4 | 5 | class IsMovie(object): 6 | 7 | # returns True if its a movie, False if tv show 8 | def _is_movie(self): 9 | # is it a movie or tv show? assume movie 10 | is_movie = True 11 | determined_movie_or_tv = False 12 | 13 | if has(self.mediainfo, "general.0.tmdb"): 14 | if self.mediainfo["general"][0]["tmdb"].startswith("movie/"): 15 | is_movie = True 16 | determined_movie_or_tv = True 17 | elif self.mediainfo["general"][0]["tmdb"].startswith("tv/"): 18 | is_movie = False 19 | determined_movie_or_tv = True 20 | 21 | if not determined_movie_or_tv: 22 | if has(self.mediainfo, "general.0.movie_name"): 23 | # tv show name in format "Name - S01E01" or "Name - S01E01E02" 24 | is_tv = re.search( 25 | r"^.+\s-\sS\d{2}(E\d{2})+.*$", 26 | self.mediainfo["general"][0]["movie_name"], 27 | ) 28 | if is_tv: 29 | is_movie = not (is_tv) 30 | return is_movie 31 | -------------------------------------------------------------------------------- /vdator/checks/mixins/print_header.py: -------------------------------------------------------------------------------- 1 | class PrintHeader(object): 2 | def _print_header(self, heading): 3 | return "> **{}**\n".format(heading) 4 | -------------------------------------------------------------------------------- /vdator/checks/mixins/section_id.py: -------------------------------------------------------------------------------- 1 | class SectionId(object): 2 | def _section_id(self, section, i): 3 | reply = "" 4 | if "id" in self.mediainfo[section.lower()][i]: 5 | reply += "#" + self.mediainfo[section.lower()][i]["id"] 6 | else: 7 | reply += str(i) 8 | return reply 9 | -------------------------------------------------------------------------------- /vdator/checks/mkvmerge.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | from dotenv import load_dotenv 4 | import os, re, requests 5 | 6 | # load environment variables 7 | load_dotenv() 8 | 9 | MKVMERGE_VERSION = os.environ.get("MKVMERGE_VERSION") 10 | 11 | 12 | class CheckMKVMerge(Check): 13 | def __init__(self, reporter, mediainfo): 14 | super().__init__( 15 | reporter, 16 | mediainfo, 17 | "Error checking mkvtoolnix version", 18 | ) 19 | 20 | def run(self): 21 | """ 22 | Runs the check and returns reply. 23 | Wraps check in try...except to prevent crashes 24 | 25 | Returns 26 | ------- 27 | reply string 28 | """ 29 | reply = "" 30 | try: 31 | reply += self.get_reply(MKVMERGE_VERSION) 32 | except: 33 | traceback.print_exc() 34 | reply += self.reporter.print_report("fail", self.run_fail_msg) 35 | return reply 36 | 37 | # overriding abstract method 38 | # force_version = "Version 57.0.0 \"Till The End\" 2021-05-22" 39 | # force_version = "Version 76.0 \"Celebration\" 2023-04-30" 40 | def get_reply(self, force_version=None): 41 | reply = "" 42 | 43 | version_name_regex_mkvtoolnix = r'"(.*)"' 44 | version_name_regex_mediainfo = r"\'(.*)\'" 45 | version_num_regex = r"(\d+\.\d+(\.\d+)?)" 46 | 47 | if not has(self.mediainfo, "general.0.writing_application"): 48 | reply += self.reporter.print_report("info", "Not using mkvtoolnix") 49 | return reply 50 | 51 | mediainfo_version_num = re.search( 52 | version_num_regex, self.mediainfo["general"][0]["writing_application"] 53 | ) 54 | if mediainfo_version_num: 55 | mediainfo_version_num = mediainfo_version_num.group(1) 56 | 57 | mediainfo_version_name = re.search( 58 | version_name_regex_mediainfo, 59 | self.mediainfo["general"][0]["writing_application"], 60 | ) 61 | if mediainfo_version_name: 62 | mediainfo_version_name = mediainfo_version_name.group(1) 63 | 64 | if not mediainfo_version_num or not mediainfo_version_name: 65 | reply += self.reporter.print_report("info", "Not using mkvtoolnix") 66 | return reply 67 | 68 | try: 69 | r = requests.get(os.environ.get("MKVTOOLNIX_NEWS")) 70 | if r.status_code == 200: 71 | ## Version 32.0.0 "Astral Progressions" 2019-03-12 72 | ## Version 76.0 "Celebration" 2023-04-30 73 | mkvtoolnix_version_line = r.text.splitlines()[0] 74 | if force_version: 75 | mkvtoolnix_version_line = force_version 76 | 77 | mkvtoolnix_version_num = re.search( 78 | version_num_regex, mkvtoolnix_version_line 79 | ) 80 | if mkvtoolnix_version_num: 81 | mkvtoolnix_version_num = mkvtoolnix_version_num.group(1) 82 | 83 | mkvtoolnix_version_name = re.search( 84 | version_name_regex_mkvtoolnix, mkvtoolnix_version_line 85 | ) 86 | if mkvtoolnix_version_name: 87 | mkvtoolnix_version_name = mkvtoolnix_version_name.group(1) 88 | 89 | if ( 90 | mkvtoolnix_version_num == mediainfo_version_num 91 | and mkvtoolnix_version_name == mediainfo_version_name 92 | ): 93 | reply += self.reporter.print_report( 94 | "correct", 95 | "Uses latest mkvtoolnix: `" 96 | + mediainfo_version_num 97 | + ' "' 98 | + mediainfo_version_name 99 | + '"`', 100 | ) 101 | else: 102 | reply += self.reporter.print_report( 103 | "warning", 104 | "Not using latest mkvtoolnix: `" 105 | + mediainfo_version_num 106 | + ' "' 107 | + mediainfo_version_name 108 | + '"` latest is: `' 109 | + mkvtoolnix_version_num 110 | + ' "' 111 | + mkvtoolnix_version_name 112 | + '"`', 113 | ) 114 | except: 115 | reply += self.reporter.print_report( 116 | "info", "Could not fetch latest mkvtoolnix version" 117 | ) 118 | return reply 119 | 120 | return reply 121 | -------------------------------------------------------------------------------- /vdator/checks/movie_name_format.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import IsMovie 3 | 4 | import re 5 | 6 | 7 | class CheckMovieNameFormat(Check, IsMovie): 8 | def __init__(self, reporter, mediainfo): 9 | super().__init__(reporter, mediainfo, "Error parsing movie name") 10 | 11 | # overriding abstract method 12 | def get_reply(self): 13 | reply = "" 14 | 15 | # is it a movie or tv show? 16 | is_movie = self._is_movie() 17 | 18 | if has(self.mediainfo, "general.0.movie_name"): 19 | if is_movie: 20 | # movie name in format "Name (Year)" 21 | if re.search( 22 | r"^.+\(\d{4}\)$", self.mediainfo["general"][0]["movie_name"] 23 | ): 24 | reply += self.reporter.print_report( 25 | "correct", 26 | "Movie name format `Name (Year)`: `" 27 | + self.mediainfo["general"][0]["movie_name"] 28 | + "`", 29 | ) 30 | else: 31 | reply += self.reporter.print_report( 32 | "error", 33 | "Movie name does not match format `Name (Year)`: `" 34 | + self.mediainfo["general"][0]["movie_name"] 35 | + "`", 36 | ) 37 | reply += self._movie_name_extra_space( 38 | self.mediainfo["general"][0]["movie_name"] 39 | ) 40 | else: 41 | # tv show name in format "Name - S01E01" or "Name - S01E01E02" 42 | if re.search( 43 | r"^.+\s-\sS\d{2}(E\d{2})+.*$", 44 | self.mediainfo["general"][0]["movie_name"], 45 | ): 46 | reply += self.reporter.print_report( 47 | "correct", 48 | "TV show name format `Name - S01E01`: `" 49 | + self.mediainfo["general"][0]["movie_name"] 50 | + "`", 51 | ) 52 | else: 53 | reply += self.reporter.print_report( 54 | "error", 55 | "TV show name does not match format `Name - S01E01`: `" 56 | + self.mediainfo["general"][0]["movie_name"] 57 | + "`", 58 | ) 59 | reply += self._movie_name_extra_space( 60 | self.mediainfo["general"][0]["movie_name"] 61 | ) 62 | else: 63 | reply += self.reporter.print_report("error", "Missing movie name") 64 | 65 | return reply 66 | 67 | def _movie_name_extra_space(self, movie_name): 68 | reply = "" 69 | 70 | if movie_name.startswith(" "): 71 | reply += self.reporter.print_report( 72 | "error", "Movie name starts with an extra space!" 73 | ) 74 | 75 | if movie_name.endswith(" "): 76 | reply += self.reporter.print_report( 77 | "error", "Movie name ends with an extra space!" 78 | ) 79 | 80 | return reply 81 | -------------------------------------------------------------------------------- /vdator/checks/muxing_mode.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | 4 | class CheckMuxingMode(Check): 5 | def __init__(self, reporter, mediainfo): 6 | super().__init__( 7 | reporter, 8 | mediainfo, 9 | "Error checking muxing mode", 10 | ) 11 | 12 | # overriding abstract method 13 | def get_reply(self): 14 | reply, is_valid = "", True 15 | 16 | for section in ["general", "video", "audio", "text"]: 17 | for i, _ in enumerate(self.mediainfo[section]): 18 | if "muxing_mode" in self.mediainfo[section][i]: 19 | reply += self.reporter.print_report( 20 | "error", 21 | section.capitalize() 22 | + " #" 23 | + self.mediainfo[section][i]["id"] 24 | + " has muxing mode: `" 25 | + self.mediainfo[section][i]["muxing_mode"] 26 | + "`", 27 | ) 28 | is_valid = False 29 | 30 | if is_valid: 31 | reply += self.reporter.print_report( 32 | "correct", "All tracks do not have a muxing mode" 33 | ) 34 | 35 | return reply 36 | -------------------------------------------------------------------------------- /vdator/checks/print_audio_track_names.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | 5 | class CheckPrintAudioTrackNames(Check, SectionId): 6 | def __init__(self, reporter, mediainfo): 7 | super().__init__(reporter, mediainfo, "Error printing audio track names") 8 | 9 | # overriding abstract method 10 | def get_reply(self): 11 | reply = "" 12 | 13 | if len(self.mediainfo["audio"]) > 0: 14 | reply += "Audio Track Names:\n" 15 | reply += "```" 16 | for i, _ in enumerate(self.mediainfo["audio"]): 17 | reply += self._section_id("audio", i) + ": " 18 | if "title" in self.mediainfo["audio"][i]: 19 | reply += self.mediainfo["audio"][i]["title"] + "\n" 20 | reply += "```" 21 | else: 22 | reply = self.reporter.print_report("error", "No audio tracks") 23 | 24 | return reply 25 | -------------------------------------------------------------------------------- /vdator/checks/print_chapters.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | import re 4 | 5 | 6 | class CheckPrintChapters(Check): 7 | def __init__(self, reporter, mediainfo): 8 | super().__init__(reporter, mediainfo, "Error printing chapters") 9 | 10 | # overriding abstract method 11 | def get_reply(self): 12 | reply = "" 13 | 14 | if len(self.mediainfo["menu"]) > 0: 15 | for i, menu in enumerate(self.mediainfo["menu"]): 16 | reply += f"> **Chapters {i + 1}**\n" 17 | numbered_chapters = True 18 | for ch in menu: 19 | for title in ch["titles"]: 20 | if not re.search( 21 | r"^chapter\s\d+", title["title"], re.IGNORECASE 22 | ): 23 | numbered_chapters = False 24 | 25 | if not numbered_chapters: 26 | reply += "```" 27 | for ch in menu: 28 | if ch["time"]: 29 | reply += ch["time"] + " :" 30 | for title in ch["titles"]: 31 | if title["language"]: 32 | reply += " lang: " + title["language"] 33 | if title["title"]: 34 | reply += " title: " + title["title"] 35 | reply += "\n" 36 | reply += "```" 37 | else: 38 | reply += self.reporter.print_report("info", "Chapters are numbered") 39 | if len(menu[0]["languages"]) > 0 and menu[0]["languages"][0] != "": 40 | reply += ( 41 | "Chapter languages: `" + ", ".join(menu[0]["languages"]) + "`\n" 42 | ) 43 | else: 44 | reply += self.reporter.print_report("info", "No chapters") 45 | 46 | return reply 47 | -------------------------------------------------------------------------------- /vdator/checks/print_text_tracks.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | 5 | class CheckPrintTextTracks(Check, SectionId): 6 | def __init__(self, reporter, mediainfo): 7 | super().__init__(reporter, mediainfo, "Error printing text tracks") 8 | 9 | # overriding abstract method 10 | def get_reply(self): 11 | reply = "" 12 | if len(self.mediainfo["text"]) > 0: 13 | reply += "```" 14 | for i, _ in enumerate(self.mediainfo["text"]): 15 | reply += self._section_id("text", i) + ":" 16 | if "default" in self.mediainfo["text"][i]: 17 | reply += " default:" + self.mediainfo["text"][i]["default"] 18 | if "forced" in self.mediainfo["text"][i]: 19 | reply += " forced:" + self.mediainfo["text"][i]["forced"] 20 | if "language" in self.mediainfo["text"][i]: 21 | reply += " language:" + self.mediainfo["text"][i]["language"] 22 | if "title" in self.mediainfo["text"][i]: 23 | reply += " title: " + self.mediainfo["text"][i]["title"] 24 | reply += "\n" 25 | reply += "```" 26 | else: 27 | reply += self.reporter.print_report("info", "No text tracks") 28 | return reply 29 | -------------------------------------------------------------------------------- /vdator/checks/remove_until_first_codec.py: -------------------------------------------------------------------------------- 1 | class RemoveUntilFirstCodec(object): 2 | def __init__(self, codecs): 3 | self.codecs = codecs 4 | 5 | def remove(self, title): 6 | title2, title_parts, found = title, list(), False 7 | if " / " in title: 8 | for part in title.split(" / "): 9 | if self.codecs.is_audio_title(part): 10 | # stop when we get first codec 11 | found = True 12 | break 13 | else: 14 | title2_split = title2.split(" / ") 15 | # remove part since its not a codec 16 | title2 = " / ".join(title2_split[1:]).strip() 17 | # save part in list 18 | title_parts.append(title2_split[0]) 19 | return title2, title_parts, found 20 | -------------------------------------------------------------------------------- /vdator/checks/text_default_flag.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | 4 | class CheckTextDefaultFlag(Check): 5 | def __init__(self, reporter, mediainfo): 6 | super().__init__(reporter, mediainfo, "Error checking text track default flag") 7 | 8 | # overriding abstract method 9 | def get_reply(self): 10 | # english subs for foreign films should be default=yes 11 | reply = "" 12 | 13 | if len(self.mediainfo["text"]) > 0: 14 | first_audio_language, has_english_subs, english_subs_default_yes = ( 15 | False, 16 | False, 17 | False, 18 | ) 19 | 20 | if has(self.mediainfo, "audio.0.language"): 21 | first_audio_language = self.mediainfo["audio"][0]["language"].lower() 22 | 23 | if first_audio_language != "english": 24 | # text tracks with language and default keys 25 | text_with_properties = [ 26 | item 27 | for item in self.mediainfo["text"] 28 | if ("language" in item and "default" in item) 29 | ] 30 | for item in text_with_properties: 31 | if item["language"].lower() == "english": 32 | has_english_subs = True 33 | if item["default"].lower() == "yes": 34 | english_subs_default_yes = True 35 | if has_english_subs and english_subs_default_yes: 36 | break 37 | 38 | if has_english_subs: 39 | # foreign audio and has english subs. english subs should be default=yes 40 | if english_subs_default_yes: 41 | reply += self.reporter.print_report( 42 | "correct", 43 | "Foreign film, one of the English subtitles are `default=yes`", 44 | ) 45 | else: 46 | reply += self.reporter.print_report( 47 | "error", 48 | "Foreign film, one of the English subtitles should be `default=yes`", 49 | ) 50 | 51 | return reply 52 | -------------------------------------------------------------------------------- /vdator/checks/text_order.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import IsCommentaryTrack, SectionId 3 | 4 | from collections import OrderedDict 5 | import re 6 | 7 | 8 | class CheckTextOrder(Check, IsCommentaryTrack, SectionId): 9 | """ 10 | Checks text track order: 11 | Languages are in alphabetical order with English first 12 | Within language: No title, SDH, alphabetical 13 | Commentary subtitles after regular subtitles 14 | """ 15 | 16 | def __init__(self, reporter, mediainfo): 17 | super().__init__( 18 | reporter, 19 | mediainfo, 20 | "Error checking text track order", 21 | ) 22 | 23 | # overriding abstract method 24 | def get_reply(self): 25 | reply = "" 26 | 27 | if len(self.mediainfo["text"]) == 0: 28 | return reply 29 | 30 | # text_langs = ['German', 'English', ...] 31 | text_langs = [ 32 | self._format_lang(text["language"]) for text in self.mediainfo["text"] 33 | ] 34 | # remove duplicates from list and preserve order 35 | text_langs = list(dict.fromkeys(text_langs)) 36 | 37 | # list of text tracks by language 38 | # 39 | # OrderedDict([ 40 | # ('English', [{}, ...]), ('German', [{}, ...]) 41 | # ]) 42 | # 43 | text_tracks_by_lang = OrderedDict((k, list()) for k in text_langs) 44 | commentary_tracks_by_lang = OrderedDict((k, list()) for k in text_langs) 45 | has_commentary = False 46 | 47 | # get tracks by language, and separate commentary tracks 48 | for i, text in enumerate(self.mediainfo["text"]): 49 | text["title"] = text["title"] if "title" in text else "" 50 | if self._is_commentary_track(text["title"]): 51 | commentary_tracks_by_lang[self._format_lang(text["language"])].append( 52 | text 53 | ) 54 | has_commentary = True 55 | else: 56 | text_tracks_by_lang[self._format_lang(text["language"])].append(text) 57 | # forced english track should be first 58 | reply += self._forced_english_track_first(i, text) 59 | 60 | # languages should be in alphabetical order with English first 61 | reply += self._languages_in_order(text_tracks_by_lang, "Regular subs: ") 62 | if has_commentary: 63 | reply += self._languages_in_order( 64 | commentary_tracks_by_lang, "Commentary subs: " 65 | ) 66 | 67 | # subtitles in order within language: no title, SDH, rest in alphabetical order 68 | reply += "**Expected order within language:** No title, SDH, alphabetical\n" 69 | reply += self._subs_in_order_within_language( 70 | text_tracks_by_lang, "Regular subs: " 71 | ) 72 | if has_commentary: 73 | reply += self._subs_in_order_within_language( 74 | commentary_tracks_by_lang, "Commentary subs: " 75 | ) 76 | 77 | # commentary tracks should be after regular subs 78 | if has_commentary: 79 | reply += self._commentary_last( 80 | text_tracks_by_lang, commentary_tracks_by_lang 81 | ) 82 | 83 | return reply 84 | 85 | def _format_lang(self, lang): 86 | """ 87 | Format a text language to remove parenthesis 88 | English (US) becomes English 89 | """ 90 | return re.sub(r"\([^)]*\)", "", lang).strip() 91 | 92 | def _forced_english_track_first(self, i, text_track): 93 | """ 94 | Forced english track should be first 95 | Only checks tracks without titles, since titles have a predefined order: No title, SDH, alphabetical 96 | """ 97 | reply = "" 98 | 99 | is_forced_track = ( 100 | text_track["forced"].lower() == "yes" if "forced" in text_track else False 101 | ) 102 | is_english_track = text_track["language"].lower() == "english" 103 | # only checks tracks without titles 104 | title_is_blank = text_track["title"] == "" 105 | is_first_track = i == 0 106 | 107 | if ( 108 | is_forced_track 109 | and is_english_track 110 | and title_is_blank 111 | and not is_first_track 112 | ): 113 | # forced english track should be first 114 | reply += self.reporter.print_report( 115 | "error", 116 | "Text {} is a forced English track, it should be first".format( 117 | self._section_id("text", i) 118 | ), 119 | ) 120 | 121 | return reply 122 | 123 | def _languages_in_order(self, text_tracks_by_lang, prefix=""): 124 | """Languages should be in alphabetical order with English first""" 125 | reply = "" 126 | text_track_langs_order = list(text_tracks_by_lang.keys()) 127 | text_track_langs_expected_order = self._sort_sub_langs(text_track_langs_order) 128 | 129 | if text_track_langs_expected_order == text_track_langs_order: 130 | reply += self.reporter.print_report( 131 | "correct", 132 | prefix + "Languages are in alphabetical order with English first", 133 | ) 134 | else: 135 | reply += self.reporter.print_report( 136 | "error", 137 | prefix 138 | + "Languages should be in alphabetical order with English first. Expected: `" 139 | + ", ".join(text_track_langs_expected_order) 140 | + "`", 141 | ) 142 | 143 | return reply 144 | 145 | def _commentary_last(self, text_tracks_by_lang, commentary_tracks_by_lang): 146 | """Commentary tracks should be last""" 147 | reply = "" 148 | 149 | if len(commentary_tracks_by_lang) > 0: 150 | last_text_id = self._get_last_text_id(text_tracks_by_lang) 151 | if last_text_id != -1: 152 | try: 153 | first_commentary = next(iter(commentary_tracks_by_lang.values())) 154 | if first_commentary and has(first_commentary, "0.id"): 155 | try: 156 | if last_text_id > int(first_commentary[0]["id"]): 157 | # commentary tracks should be after regular subs 158 | reply += self.reporter.print_report( 159 | "error", 160 | "Commentary subs should be after regular subs", 161 | ) 162 | else: 163 | # commentary tracks are after regular subs 164 | reply += self.reporter.print_report( 165 | "correct", 166 | "Commentary subs are after regular subs", 167 | ) 168 | except ValueError: 169 | pass 170 | except StopIteration: 171 | pass 172 | 173 | return reply 174 | 175 | def _subs_in_order_within_language(self, text_tracks_by_lang, prefix=""): 176 | """ 177 | Subtitles in order within language 178 | No title, SDH, rest in alphabetical order 179 | """ 180 | reply = "" 181 | for k, v in text_tracks_by_lang.items(): 182 | # k = 'English' 183 | # v = tracks list [{}, ...] 184 | 185 | v_ids = [track["id"] for track in v] 186 | expected_order = self._sort_subs_within_lang(v) 187 | expected_order_ids = [track["id"] for track in expected_order] 188 | 189 | if v_ids != expected_order_ids: 190 | # subs for language are out of order 191 | reply += self.reporter.print_report( 192 | "warning", 193 | prefix 194 | + "Language: `{}`: Subtitles should be in order: `{}`".format( 195 | k, ", ".join(expected_order_ids) 196 | ), 197 | ) 198 | 199 | return reply 200 | 201 | def _get_last_text_id(self, text_tracks_by_lang): 202 | """Get track id of last subtitle""" 203 | last_text_id = -1 204 | for _, tracks in text_tracks_by_lang.items(): 205 | for track in tracks: 206 | try: 207 | curr_text_id = int(track["id"]) 208 | if curr_text_id > last_text_id: 209 | last_text_id = curr_text_id 210 | except ValueError: 211 | continue 212 | return last_text_id 213 | 214 | def _sort_sub_langs(self, languages): 215 | """ 216 | Sort subs by language 217 | English first, rest in alphabetical order 218 | """ 219 | # English tracks first 220 | tracks = [lang for lang in languages if lang.lower() == "english"] 221 | # rest of the tracks by language in alphabetical order 222 | rest = sorted([lang for lang in languages if lang.lower() != "english"]) 223 | 224 | # add the rest of the tracks 225 | if rest: 226 | tracks.extend(rest) 227 | 228 | return tracks 229 | 230 | def _sort_subs_within_lang(self, text_tracks): 231 | """ 232 | Sort subtitles within languages 233 | No title, SDH, rest in alphabetical order 234 | """ 235 | # ['', 'SDH', '...'] 236 | unparsed = text_tracks.copy() 237 | parsed = [] 238 | 239 | # add tracks with no title 240 | for track in unparsed: 241 | if track["title"] == "": 242 | parsed.append(track) 243 | unparsed = [track for track in unparsed if track["title"] != ""] 244 | 245 | # add tracks with SDH 246 | tracks_with_SDH = [] 247 | for track in unparsed: 248 | if "SDH" in track["title"].split(): 249 | tracks_with_SDH.append(track) 250 | tracks_with_SDH = sorted(tracks_with_SDH, key=lambda track: track["title"]) 251 | if tracks_with_SDH: 252 | parsed.extend(tracks_with_SDH) 253 | unparsed = [ 254 | track for track in unparsed if ("SDH" not in track["title"].split()) 255 | ] 256 | 257 | # sort rest of the tracks in alphabetical order 258 | unparsed = sorted(unparsed, key=lambda track: track["title"]) 259 | 260 | # add the rest of the tracks 261 | if unparsed: 262 | parsed.extend(unparsed) 263 | 264 | return parsed 265 | -------------------------------------------------------------------------------- /vdator/checks/tracks_have_language.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | from .mixins import SectionId 3 | 4 | 5 | class CheckTracksHaveLanguage(Check, SectionId): 6 | def __init__(self, reporter, mediainfo): 7 | super().__init__(reporter, mediainfo, "Error checking if tracks have language") 8 | 9 | # overriding abstract method 10 | def get_reply(self): 11 | reply, is_valid = "", True 12 | 13 | for section in ["video", "audio", "text"]: 14 | for i, _ in enumerate(self.mediainfo[section]): 15 | if "language" not in self.mediainfo[section][i]: 16 | reply += self.reporter.print_report( 17 | "error", 18 | section.capitalize() 19 | + " " 20 | + self._section_id(section, i) 21 | + ": Does not have a language chosen", 22 | ) 23 | is_valid = False 24 | 25 | if is_valid: 26 | reply += self.reporter.print_report( 27 | "correct", "All tracks have a language chosen" 28 | ) 29 | 30 | return reply 31 | -------------------------------------------------------------------------------- /vdator/checks/video_language_matches_first_audio_language.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | 4 | class CheckVideoLanguageMatchesFirstAudioLanguage(Check): 5 | def __init__(self, reporter, mediainfo): 6 | super().__init__( 7 | reporter, 8 | mediainfo, 9 | "Error checking that video language matches first audio language", 10 | ) 11 | 12 | # overriding abstract method 13 | def get_reply(self): 14 | reply = "" 15 | 16 | if not has(self.mediainfo, "video.0.language"): 17 | reply += self.reporter.print_report("error", "Video language not set") 18 | return reply 19 | if not has(self.mediainfo, "audio.0.language"): 20 | reply += self.reporter.print_report("error", "First audio language not set") 21 | return reply 22 | if ( 23 | self.mediainfo["video"][0]["language"] 24 | == self.mediainfo["audio"][0]["language"] 25 | ): 26 | reply += self.reporter.print_report( 27 | "correct", 28 | "Video language matches first audio language: `" 29 | + self.mediainfo["video"][0]["language"] 30 | + "`", 31 | ) 32 | else: 33 | reply += self.reporter.print_report( 34 | "error", 35 | "Video language does not match first audio language: `" 36 | + self.mediainfo["video"][0]["language"] 37 | + "` vs `" 38 | + self.mediainfo["audio"][0]["language"] 39 | + "`", 40 | ) 41 | return reply 42 | -------------------------------------------------------------------------------- /vdator/checks/video_track.py: -------------------------------------------------------------------------------- 1 | from .check import * 2 | 3 | import re 4 | 5 | 6 | class CheckVideoTrack(Check): 7 | def __init__(self, reporter, source_detector, codecs, mediainfo, bdinfo): 8 | super().__init__(reporter, mediainfo, "Error checking video track name") 9 | self.source_detector = source_detector 10 | self.codecs = codecs 11 | self.bdinfo = bdinfo 12 | 13 | # overriding abstract method 14 | def get_reply(self): 15 | reply = "" 16 | 17 | if ( 18 | has_many( 19 | self.mediainfo, 20 | "video.0", 21 | [ 22 | "format", 23 | "format_version", 24 | "bit_rate", 25 | "height", 26 | "scan_type", 27 | "frame_rate", 28 | "display_aspect_ratio", 29 | "title", 30 | ], 31 | ) 32 | and self.source_detector.is_dvd() 33 | ): 34 | # dvd video title from mediainfo 35 | video_title = self._dvd_video_title_from_mediainfo() 36 | mediainfo_title = self.mediainfo["video"][0]["title"] 37 | 38 | if mediainfo_title == video_title: 39 | reply += self.reporter.print_report( 40 | "correct", 41 | "Video track names match: ```" + mediainfo_title + "```", 42 | new_line=False, 43 | ) 44 | else: 45 | reply += self.reporter.print_report( 46 | "error", 47 | "Video track names missmatch:\n```fix\nExpected: " 48 | + video_title 49 | + "\nMediaInfo: " 50 | + mediainfo_title 51 | + "```", 52 | new_line=False, 53 | ) 54 | reply += show_diff(mediainfo_title, video_title) 55 | 56 | elif has(self.bdinfo, "video") and has(self.mediainfo, "video"): 57 | if len(self.bdinfo["video"]) < 1: 58 | reply += self.reporter.print_report( 59 | "error", "Missing bdinfo video track" 60 | ) 61 | return reply 62 | elif len(self.mediainfo["video"]) < 1: 63 | reply += self.reporter.print_report( 64 | "error", "Missing mediainfo video track" 65 | ) 66 | return reply 67 | 68 | if has(self.mediainfo, "video.0.title") and has(self.bdinfo, "video.0"): 69 | mediainfo_video_title = self.mediainfo["video"][0]["title"] 70 | bdinfo_video_title = self.bdinfo["video"][0] 71 | 72 | # 1080i @ 25fps is actually progressive 73 | reply += self._actually_progressive() 74 | 75 | bitrate_search = re.search(r"(\d+\.\d+)\skbps", mediainfo_video_title) 76 | if bitrate_search: 77 | # if mediainfo has a decimal kbps bitrate, use it in the bdinfo for comparison 78 | percise_bitrate = bitrate_search.group(1) 79 | percise_kbps = percise_bitrate + " kbps" 80 | bdinfo_video_title = re.sub( 81 | r"(\d+)\skbps", percise_kbps, bdinfo_video_title 82 | ) 83 | if self.source_detector.is_dv() and mediainfo_video_title.startswith( 84 | bdinfo_video_title 85 | ): 86 | # if source is dolby vision, only check that the first part of mediainfo video title 87 | # matches bdinfo video title. Up to BT.2020, i.e. Dolby Vision FEL is not checked 88 | reply += self.reporter.print_report( 89 | "correct", 90 | "Video track names match: ```" + mediainfo_video_title + "```", 91 | new_line=False, 92 | ) 93 | elif bdinfo_video_title == mediainfo_video_title: 94 | reply += self.reporter.print_report( 95 | "correct", 96 | "Video track names match: ```" + bdinfo_video_title + "```", 97 | new_line=False, 98 | ) 99 | else: 100 | reply += self.reporter.print_report( 101 | "error", 102 | "Video track names missmatch:\n```fix\nBDInfo: " 103 | + bdinfo_video_title 104 | + "\nMediaInfo: " 105 | + mediainfo_video_title 106 | + "```", 107 | new_line=False, 108 | ) 109 | reply += show_diff(mediainfo_video_title, bdinfo_video_title) 110 | else: 111 | reply += self.reporter.print_report( 112 | "error", "Missing mediainfo video track" 113 | ) 114 | return reply 115 | else: 116 | reply += self.reporter.print_report("error", "Could not verify video track") 117 | 118 | return reply 119 | 120 | def _dvd_video_title_from_mediainfo(self): 121 | # dictionary existence already checked 122 | 123 | video_title = "" 124 | # MPEG- 125 | video_title += self.mediainfo["video"][0]["format"].split()[0] + "-" 126 | 127 | # 1 128 | video_title += "".join( 129 | re.findall(r"[\d]+", self.mediainfo["video"][0]["format_version"]) 130 | ) 131 | video_title += " Video / " 132 | 133 | # bitrate 134 | video_title += ( 135 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["bit_rate"])) 136 | + " kbps" 137 | ) 138 | video_title += " / " 139 | 140 | # height 141 | video_title += "".join( 142 | re.findall(r"[\d]+", self.mediainfo["video"][0]["height"]) 143 | ) 144 | 145 | # scan type 146 | (scan_type, _) = self.codecs.get_scan_type_title_name( 147 | self.mediainfo["video"][0]["scan_type"].lower(), 0 148 | ) 149 | video_title += scan_type 150 | video_title += " / " 151 | 152 | # fps 153 | video_fps = float( 154 | "".join(re.findall(r"\d+\.\d+", self.mediainfo["video"][0]["frame_rate"])) 155 | ) 156 | if video_fps.is_integer(): 157 | video_fps = int(video_fps) 158 | video_title += str(video_fps) + " fps / " 159 | 160 | # aspect ratio 161 | video_title += self.mediainfo["video"][0]["display_aspect_ratio"] 162 | 163 | return video_title 164 | 165 | def _actually_progressive(self): 166 | # dictionary existence already checked 167 | 168 | reply = "" 169 | 170 | bdinfo_video_title = self.bdinfo["video"][0] 171 | bdinfo_video_parts = bdinfo_video_title.split(" / ") 172 | 173 | if len(bdinfo_video_parts) >= 3: 174 | scan_type = bdinfo_video_parts[2].strip()[-1].lower() 175 | video_fps = float( 176 | "".join( 177 | re.findall(r"\d*\.\d+|\d+", bdinfo_video_parts[3].strip().lower()) 178 | ) 179 | ) 180 | (_, actually_progressive) = self.codecs.get_scan_type_title_name( 181 | scan_type, video_fps 182 | ) 183 | if actually_progressive: 184 | reply += self.reporter.print_report( 185 | "info", "Note: 1080i @ 25fps is actually progressive" 186 | ) 187 | 188 | return reply 189 | -------------------------------------------------------------------------------- /vdator/data/codecs.json: -------------------------------------------------------------------------------- 1 | { 2 | "codecs": { 3 | "video": { 4 | "h264/AVC": ".h264", 5 | "h264/MVC": ".mvc", 6 | "h265/HEVC": ".h265", 7 | "MPEG1": ".m1v", 8 | "MPEG2": ".m2v", 9 | "VC-1": ".vc1" 10 | }, 11 | "video_3d": { 12 | "h264/MVC": ".mvc" 13 | }, 14 | "audio": { 15 | "AC3": ".ac3", 16 | "AC3 EX": ".ac3", 17 | "AC3 Surround": ".ac3", 18 | "DTS Hi-Res": ".dtshr", 19 | "DTS Master Audio": ".dtsma", 20 | "DTS": ".dts", 21 | "FLAC Audio": ".flac", 22 | "RAW/PCM": ".pcm", 23 | "TrueHD/AC3": ".thd", 24 | "TrueHD/AC3 (Atmos)": ".thd" 25 | }, 26 | "subtitles": { 27 | "Subtitle (PGS)": ".sup", 28 | "Subtitle (DVD)": ".sup" 29 | }, 30 | "chapters": { 31 | "Chapters": ".txt" 32 | } 33 | }, 34 | "track_titles": { 35 | "video": { 36 | "MPEG-1 Video": "MPEG-1", 37 | "MPEG-2 Video": "MPEG-2", 38 | "MPEG-4 AVC Video": "AVC", 39 | "MPEG-H HEVC Video": "HEVC", 40 | "VC-1 Video": "VC-1" 41 | }, 42 | "audio": { 43 | "DTS Audio": "DTS", 44 | "DTS-HD High-Res Audio": "DTS-HD.HR", 45 | "DTS-HD Master Audio": "DTS-HD.MA", 46 | "DTS:X Master Audio": "DTS-X", 47 | "Dolby Digital Audio": "DD", 48 | "Dolby Digital EX Audio": "DD-EX", 49 | "Dolby Digital Plus Audio": "DDP", 50 | "Dolby TrueHD Audio": "TrueHD", 51 | "Dolby TrueHD/Atmos Audio": "TrueHD.Atmos", 52 | "FLAC Audio": "FLAC" 53 | } 54 | }, 55 | "scan_types": { 56 | "interlaced" : "i", 57 | "mbaff" : "i", 58 | "progressive" : "p" 59 | } 60 | } -------------------------------------------------------------------------------- /vdator/data/urls.json: -------------------------------------------------------------------------------- 1 | { 2 | "urls": { 3 | "dpaste.com": { 4 | "slug_regex": "https?://dpaste.com/(.*)", 5 | "raw_url_regex": "https?://dpaste.com/{}.txt", 6 | "raw_url": "https://dpaste.com/{}.txt" 7 | }, 8 | "dpaste.org": { 9 | "slug_regex": "https?://dpaste.org/(.*)", 10 | "raw_url_regex": "https?://dpaste.org/{}/raw", 11 | "raw_url": "https://dpaste.org/{}/raw" 12 | }, 13 | "hastebin.com": { 14 | "slug_regex": "https?://hastebin.com/(.*)", 15 | "raw_url_regex": "https?://hastebin.com/raw/{}", 16 | "raw_url": "https://hastebin.com/raw/{}" 17 | }, 18 | "www.heypasteit.com": { 19 | "slug_regex": "https?://www.heypasteit.com/clip/(.*)", 20 | "raw_url_regex": "https?://www.heypasteit.com/download/{}", 21 | "raw_url": "https://www.heypasteit.com/download/{}" 22 | }, 23 | "paste.centos.org": { 24 | "slug_regex": "https?://paste.centos.org/view/(.*)", 25 | "raw_url_regex": "https?://paste.centos.org/view/raw/{}", 26 | "raw_url": "https://paste.centos.org/view/raw/{}" 27 | }, 28 | "paste.ee": { 29 | "slug_regex": "https?://paste.ee/p/(.*)", 30 | "raw_url_regex": "https?://paste.ee/d/{}", 31 | "raw_url": "https://paste.ee/d/{}" 32 | }, 33 | "paste.opensuse.org": { 34 | "slug_regex": "https?://paste.opensuse.org/(.*)", 35 | "raw_url_regex": "https?://paste.opensuse.org/view/raw/{}", 36 | "raw_url": "https://paste.opensuse.org/view/raw/{}" 37 | }, 38 | "pastebin.com": { 39 | "slug_regex": "https?://pastebin.com/(.*)", 40 | "raw_url_regex": "https?://pastebin.com/raw/{}", 41 | "raw_url": "https://pastebin.com/raw/{}" 42 | }, 43 | "rentry.co": { 44 | "slug_regex": "https?://rentry.co/(.*)", 45 | "raw_url_regex": "https?://rentry.co/{}/raw", 46 | "raw_url": "https://rentry.co/{}/raw" 47 | }, 48 | "termbin.com": { 49 | "slug_regex": "https?://termbin.com/(.*)", 50 | "raw_url_regex": "https?://termbin.com/{}", 51 | "raw_url": "https://termbin.com/{}" 52 | }, 53 | "textbin.net": { 54 | "slug_regex": "https?://textbin.net/(.*)", 55 | "raw_url_regex": "https?://textbin.net/raw/{}", 56 | "raw_url": "https://textbin.net/raw/{}" 57 | } 58 | } 59 | } -------------------------------------------------------------------------------- /vdator/helpers.py: -------------------------------------------------------------------------------- 1 | from pydash import has 2 | import difflib 3 | 4 | 5 | def balanced_blockquotes(str): 6 | """ 7 | Check if blockquotes are balanced 8 | 9 | Parameters 10 | ---------- 11 | str : str 12 | text 13 | 14 | Returns 15 | ------- 16 | True if blockquotes are balanced, False otherwise 17 | """ 18 | num_blockquotes = str.count("```") 19 | # balanced if even number of blockquotes 20 | return (num_blockquotes % 2) == 0 21 | 22 | 23 | def split_string(str, limit, sep="\n"): 24 | """ 25 | Split string 26 | 27 | Parameters 28 | ---------- 29 | str : str 30 | string to split 31 | 32 | limit : int 33 | string length limit 34 | 35 | sep : str 36 | separator 37 | default: "\n" 38 | 39 | Returns 40 | ------- 41 | True if blockquotes are balanced, False otherwise 42 | """ 43 | limit = int(limit) 44 | words = str.split(sep) 45 | 46 | if max(map(len, words)) > limit: 47 | # limit is too small, return original string 48 | return str 49 | 50 | res, part, others = [], words[0], words[1:] 51 | for word in others: 52 | if (len(sep) + len(word)) > (limit - len(part)): 53 | res.append(part) 54 | part = word 55 | else: 56 | part += sep + word 57 | if part: 58 | res.append(part) 59 | 60 | return res 61 | 62 | 63 | def has_many(obj, base, keys): 64 | """ 65 | Check if object has many keys 66 | 67 | Parameters 68 | ---------- 69 | obj : object 70 | object to test 71 | 72 | base : str 73 | base key path 74 | 75 | keys : list 76 | keys to test 77 | 78 | Returns 79 | ------- 80 | True if all keys exist, False otherwise 81 | """ 82 | for key in keys: 83 | lookup = "" 84 | if base: 85 | lookup += base + "." 86 | lookup += key 87 | if not has(obj, lookup): 88 | return False 89 | return True 90 | 91 | 92 | def num_to_emoji(n): 93 | """ 94 | Convert number to discord emoji 95 | 96 | Parameters 97 | ---------- 98 | n : str 99 | string number 100 | 101 | Returns 102 | ------- 103 | str discord emoji if valid, False otherwise 104 | """ 105 | num_emoji_map = { 106 | "1": ":one:", 107 | "2": ":two:", 108 | "3": ":three:", 109 | "4": ":four:", 110 | "5": ":five:", 111 | "6": ":six:", 112 | "7": ":seven:", 113 | "8": ":eight:", 114 | "9": ":nine:", 115 | "10": ":ten:", 116 | } 117 | 118 | n = str(n) 119 | if n in num_emoji_map: 120 | return num_emoji_map[n] 121 | return False 122 | 123 | 124 | def show_diff(actual, expected): 125 | """ 126 | Show difference between expected and actual result 127 | using discord bold format 128 | 129 | Parameters 130 | ---------- 131 | actual : str 132 | actual result 133 | 134 | expected : str 135 | expected result 136 | 137 | Returns 138 | ------- 139 | str with differences in bold 140 | """ 141 | seqm = difflib.SequenceMatcher(None, actual, expected) 142 | 143 | output = [] 144 | for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): 145 | if opcode == "equal": 146 | output.append(seqm.a[a0:a1]) 147 | elif opcode == "insert": 148 | output.append("**" + seqm.b[b0:b1] + "**") 149 | elif opcode == "delete": 150 | output.append("~~" + seqm.a[a0:a1] + "~~") 151 | elif opcode == "replace": 152 | output.append("~~" + seqm.a[a0:a1] + "~~**" + seqm.b[b0:b1] + "**") 153 | else: 154 | # unexpected opcode 155 | continue 156 | return "Hint: " + "".join(output) + "\n" 157 | 158 | 159 | def is_float(value): 160 | try: 161 | float(value) 162 | return True 163 | except ValueError: 164 | return False 165 | -------------------------------------------------------------------------------- /vdator/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from dotenv import load_dotenv 4 | import json, os, requests, traceback 5 | 6 | # APIs 7 | import discord 8 | from discord.utils import get 9 | 10 | # parsers 11 | from helpers import balanced_blockquotes, split_string 12 | from parsers import * 13 | from source_detector import SourceDetector 14 | from reporter import Reporter, add_status_reactions 15 | from checker import Checker 16 | from checks.remove_until_first_codec import RemoveUntilFirstCodec 17 | 18 | 19 | # script location 20 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) 21 | 22 | # initialize parsers 23 | with open(os.path.join(__location__, "data/urls.json")) as f: 24 | urls = json.load(f)["urls"] 25 | url_parser = URLParser(urls) 26 | 27 | bdinfo_parser = BDInfoParser() 28 | paste_parser = PasteParser(bdinfo_parser) 29 | mediainfo_parser = MediaInfoParser() 30 | 31 | with open(os.path.join(__location__, "data/codecs.json")) as f: 32 | codecs = json.load(f) 33 | codecs_parser = CodecsParser(codecs) 34 | 35 | source_detector = SourceDetector() 36 | reporter = Reporter() 37 | checker = Checker(codecs_parser, source_detector, reporter) 38 | 39 | # load environment variables 40 | load_dotenv() 41 | 42 | # environment variables 43 | IN_GAME = os.environ.get("IN_GAME").strip() 44 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip() 45 | 46 | # channels to listen in and add reactions 47 | REVIEW_CHANNELS = [x.strip() for x in os.environ.get("REVIEW_CHANNELS").split(",")] 48 | 49 | # channels to send full summary to if from review channel 50 | REVIEW_REPLY_CHANNELS = [ 51 | x.strip() for x in os.environ.get("REVIEW_REPLY_CHANNELS").split(",") 52 | ] 53 | 54 | # channels to listen in and post full summaries 55 | BOT_CHANNELS = [x.strip() for x in os.environ.get("BOT_CHANNELS").split(",")] 56 | 57 | VERSION = "1.4.0" 58 | 59 | 60 | def print_help(): 61 | return ( 62 | "vdator " + VERSION + " help: " 63 | "I take a Pastebin link with BDInfo and MediaInfo dump." 64 | " I ignore all input after the line `" + IGNORE_AFTER_LINE + "`." 65 | " I add reactions in the following review channels: `" 66 | + ", ".join(REVIEW_CHANNELS) 67 | + "`," 68 | + " I reply with full summary from review channels to: `" 69 | + ", ".join(REVIEW_REPLY_CHANNELS) 70 | + "`" 71 | + " and post full summaries in: `" 72 | + ", ".join(BOT_CHANNELS) 73 | + "`." 74 | " Add a minus (-) sign in front of unused audio tracks in BDInfo." 75 | " I check:```" 76 | "Movie/TV name format\n" 77 | "IMDB/TMDB ids\n" 78 | "Filename\n" 79 | "Video language matches first audio language\n" 80 | "No muxing mode\n" 81 | "Uses latest mkvtoolnix\n" 82 | "Video and audio track names match\n" 83 | "DTS-HD MA 1.0/2.0 optionally to FLAC, LPCM 1.0/2.0 to FLAC, LPCM > 2.0 to DTS-HD MA\n" 84 | "Commentary to AC-3 @ 224 kbps\n" 85 | "Commentary track people and spellcheck\n" 86 | "Subtitle order\n" 87 | "Subtitle default flag\n" 88 | "Should have chapters\n" 89 | "Chapter languages\n" 90 | "Chapter padding```" 91 | "**Commands:** !help, !version" 92 | ) 93 | 94 | 95 | def print_version(): 96 | return "vdator " + VERSION 97 | 98 | 99 | intents = discord.Intents.default() 100 | intents.message_content = True 101 | client = discord.Client(intents=intents) 102 | 103 | 104 | @client.event 105 | async def on_ready(): 106 | """ 107 | Discord client is ready 108 | """ 109 | print("I'm in") 110 | print(client.user) 111 | await client.change_presence(activity=discord.Game(name=IN_GAME)) 112 | 113 | 114 | @client.event 115 | async def on_message(message): 116 | """ 117 | Discord message event 118 | 119 | Parameters 120 | ---------- 121 | message : discord.Message class 122 | discord message 123 | """ 124 | # get name of channel message was sent in 125 | # if message is in a thread, the channel name is in message.channel.parent, otherwise its in message.channel.name 126 | channel_name = ( 127 | str(message.channel.parent) 128 | if hasattr(message.channel, "parent") 129 | else str(message.channel.name) 130 | ) 131 | channel = get( 132 | message.guild.channels, 133 | name=channel_name, 134 | type=discord.ChannelType.text, 135 | ) 136 | 137 | # only listens in bot and review channels 138 | if not (channel_name in BOT_CHANNELS or channel_name in REVIEW_CHANNELS): 139 | return 140 | 141 | # help command 142 | if message.content == "!help": 143 | reply = print_help() 144 | await channel.send(reply) 145 | return 146 | 147 | # version command 148 | if message.content == "!version": 149 | reply = print_version() 150 | await channel.send(reply) 151 | return 152 | 153 | # self 154 | if message.author == client.user: 155 | # add status reactions to own messages 156 | await add_status_reactions(message, message.content) 157 | return 158 | 159 | supported_urls = url_parser.extract_supported_urls(message.content) 160 | 161 | for url in supported_urls: 162 | reply = "<" + url + ">" + "\n" 163 | 164 | try: 165 | # setup/reset reporter 166 | reporter.setup() 167 | # get paste 168 | r = requests.get(url) 169 | r.raise_for_status() 170 | paste = r.text 171 | except: 172 | traceback.print_exc() 173 | reply += reporter.print_report("fail", "Failed to get paste") 174 | else: 175 | try: 176 | (bdinfo, mediainfo, eac3to) = paste_parser.parse(paste) 177 | except: 178 | traceback.print_exc() 179 | reply += reporter.print_report("fail", "Paste parser failed") 180 | else: 181 | if mediainfo: 182 | try: 183 | # parse mediainfo 184 | mediainfo = mediainfo_parser.parse(mediainfo) 185 | except: 186 | traceback.print_exc() 187 | reply += reporter.print_report( 188 | "fail", "Mediainfo parser failed" 189 | ) 190 | else: 191 | try: 192 | remove_until_first_codec = RemoveUntilFirstCodec( 193 | codecs_parser 194 | ) 195 | match_bdinfo_audio_to_mediainfo = ( 196 | MatchBDInfoAudioToMediaInfo( 197 | remove_until_first_codec, bdinfo, mediainfo 198 | ) 199 | ) 200 | bdinfo[ 201 | "audio" 202 | ] = ( 203 | match_bdinfo_audio_to_mediainfo.match_bdinfo_audio_to_mediainfo() 204 | ) 205 | bdinfo["audio"] = bdinfo_parser.expand_compat_tracks( 206 | bdinfo["audio"] 207 | ) 208 | except: 209 | traceback.print_exc() 210 | reply += reporter.print_report( 211 | "fail", "Matching bdinfo audio tracks to mediainfo" 212 | ) 213 | else: 214 | try: 215 | # setup checker 216 | checker.setup(bdinfo, mediainfo, eac3to, channel_name) 217 | except: 218 | traceback.print_exc() 219 | reply += reporter.print_report( 220 | "fail", "vdator failed to setup checker" 221 | ) 222 | else: 223 | try: 224 | reply += checker.run_checks() 225 | except: 226 | traceback.print_exc() 227 | reply += reporter.print_report( 228 | "fail", "vdator failed to parse" 229 | ) 230 | else: 231 | reply += reporter.print_report( 232 | "error", "No mediainfo. Are you missing the `General` heading?" 233 | ) 234 | 235 | # report 236 | reply += "> **Report**\n" 237 | reply += reporter.display_report() 238 | 239 | # split into multiple messages based on reply length 240 | BLOCK_QUOTES = "```" 241 | len_limit = ( 242 | int(os.environ.get("DISCORD_MSG_CHAR_LIMIT")) - len(BLOCK_QUOTES) * 2 243 | ) 244 | replies = split_string(reply, len_limit, "\n") 245 | 246 | # preserve blockquotes 247 | for i, r in enumerate(replies): 248 | if i == len(replies) - 1: 249 | break 250 | if not balanced_blockquotes(r): 251 | replies[i] += BLOCK_QUOTES 252 | replies[i + 1] = BLOCK_QUOTES + replies[i + 1] 253 | 254 | # fix blockquotes 255 | for i, r in enumerate(replies): 256 | replies[i] = replies[i].replace("``````", "```") 257 | 258 | if channel_name in BOT_CHANNELS: 259 | # reply in bot channel 260 | for reply in replies: 261 | await channel.send(reply) 262 | elif channel_name in REVIEW_CHANNELS: 263 | # add reactions in review channel 264 | await add_status_reactions(message, reply) 265 | 266 | # and send reply to 267 | for ch in REVIEW_REPLY_CHANNELS: 268 | review_reply_channel = get( 269 | message.guild.channels, name=ch, type=discord.ChannelType.text 270 | ) 271 | for reply in replies: 272 | await review_reply_channel.send(reply) 273 | 274 | 275 | token = os.environ.get("DISCORD_BOT_SECRET") 276 | client.run(token) 277 | -------------------------------------------------------------------------------- /vdator/nltk_people.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import stopwords 3 | 4 | 5 | def download_nltk_data(): 6 | # download nltk data 7 | ntlk_list = [ 8 | "stopwords", 9 | "punkt", 10 | "averaged_perceptron_tagger", 11 | "maxent_ne_chunker", 12 | "words", 13 | ] 14 | for t in ntlk_list: 15 | nltk.download(t) 16 | 17 | 18 | def ie_preprocess(document): 19 | """ 20 | nltk preprocess text 21 | 22 | Parameters 23 | ---------- 24 | document : str 25 | text to pre process 26 | 27 | Returns 28 | ------- 29 | list sentences 30 | """ 31 | stop = stopwords.words("english") 32 | document = " ".join([i for i in document.split() if i not in stop]) 33 | sentences = nltk.sent_tokenize(document) 34 | sentences = [nltk.word_tokenize(sent) for sent in sentences] 35 | sentences = [nltk.pos_tag(sent) for sent in sentences] 36 | return sentences 37 | 38 | 39 | def extract_names(document): 40 | """ 41 | nltk extract person names 42 | 43 | Parameters 44 | ---------- 45 | document : str 46 | text 47 | 48 | Returns 49 | ------- 50 | list person names 51 | """ 52 | names = [] 53 | sentences = ie_preprocess(document) 54 | for tagged_sentence in sentences: 55 | for chunk in nltk.ne_chunk(tagged_sentence): 56 | if type(chunk) == nltk.tree.Tree: 57 | if chunk.label() == "PERSON": 58 | names.append(" ".join([c[0] for c in chunk])) 59 | return names 60 | -------------------------------------------------------------------------------- /vdator/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .bdinfo_parser import BDInfoParser 2 | from .codecs_parser import CodecsParser 3 | from .match_bdinfo_audio_to_mediainfo import MatchBDInfoAudioToMediaInfo 4 | from .media_info_parser import MediaInfoParser 5 | from .paste_parser import PasteParser 6 | from .url_parser import URLParser 7 | -------------------------------------------------------------------------------- /vdator/parsers/bdinfo_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class BDInfoParser(object): 5 | """ 6 | Parse BDInfo 7 | """ 8 | 9 | def __init__(self): 10 | self.embedded_track_types = ["ac3 core", "ac3 embedded"] 11 | # ['-ac3 core', '-ac3 embedded'] 12 | self.embedded_track_types_excluded = [ 13 | "-" + t for t in self.embedded_track_types 14 | ] 15 | # ['\\(ac3 core:', '\\(ac3 embedded:'] 16 | self.embedded_track_types_regex = [ 17 | r"\(" + a + ":" for a in self.embedded_track_types 18 | ] 19 | # ['\\(-ac3 core:', '\\(-ac3 embedded:'] 20 | self.embedded_track_types_excluded_regex = [ 21 | r"\(-" + a + ":.*\)" for a in self.embedded_track_types 22 | ] 23 | 24 | def format_track_name(self, name): 25 | """ 26 | Format track name 27 | 28 | Parameters 29 | ---------- 30 | name : str 31 | track name 32 | 33 | Returns 34 | ------- 35 | str formatted track name 36 | """ 37 | # remove multiple and trailing spaces 38 | name = " ".join(name.split()).strip() 39 | return name 40 | 41 | def format_video_track_name(self, name): 42 | """ 43 | Format video track name 44 | 45 | Parameters 46 | ---------- 47 | name : str 48 | track name 49 | 50 | Returns 51 | ------- 52 | str formatted video track name 53 | """ 54 | name = self.format_track_name(name) 55 | 56 | # remove 3d 57 | name = name.replace(" / Left Eye", "") 58 | name = name.replace(" / Right Eye", "") 59 | 60 | # force decimal instead of comma in fps 61 | name2 = name.split("/") 62 | if len(name2) >= 4: 63 | name2[3] = name2[3].replace(",", ".") 64 | name = "/".join(name2) 65 | 66 | return name 67 | 68 | def format_audio_track_name(self, name): 69 | """ 70 | Format track name 71 | 72 | Parameters 73 | ---------- 74 | name : str 75 | track name 76 | 77 | Returns 78 | ------- 79 | str formatted audio track name 80 | """ 81 | # remove (DTS Core:...) 82 | name = re.sub(r"\(DTS Core:.*\)", "", name).strip() 83 | 84 | # remove excluded (-AC3 Core...) and (-AC3 Embedded...) 85 | for ending in self.embedded_track_types_excluded_regex: 86 | name = re.sub(ending, "", name, flags=re.IGNORECASE).strip() 87 | 88 | # remove dialog normalization 89 | # needs to be after removing (DTS Core:...) 90 | # since the dts core track can have dialog normalization which will break its regex 91 | if "DN" in name.upper() and " / " in name: 92 | name = name.rpartition(" / ")[0] 93 | 94 | name = self.format_track_name(name) 95 | 96 | return name 97 | 98 | def has_compat_track(self, audio_track_name): 99 | audio_track_name = audio_track_name.lower() 100 | for track_type in self.embedded_track_types: 101 | if track_type in audio_track_name: 102 | return True 103 | return False 104 | 105 | def format_audio_compatibility_track(self, audio_track): 106 | """ 107 | Format audio compatibility track 108 | 109 | Parameters 110 | ---------- 111 | audio_track : dict 112 | audio track 113 | dict{'name':'...', 'language':'...'} 114 | 115 | Returns 116 | ------- 117 | audio track, compatibility track 118 | [dict{'name':'...', 'language':'...'}, dict{'name':'...', 'language':'...'}] 119 | """ 120 | audio_track_name_lower = audio_track["name"].lower() 121 | for i, track_type in enumerate(self.embedded_track_types): 122 | if track_type in audio_track_name_lower: 123 | embedded_track_type_index = i 124 | break 125 | 126 | audio_parts = re.split( 127 | self.embedded_track_types_regex[embedded_track_type_index], 128 | audio_track["name"], 129 | flags=re.IGNORECASE, 130 | ) 131 | audio_track["name"] = self.format_track_name(audio_parts[0]) 132 | 133 | compat_track = { 134 | "name": self.format_track_name( 135 | "Compatibility Track / Dolby Digital Audio / " 136 | + audio_parts[1].strip().rstrip(")") 137 | ), 138 | "language": audio_track["language"], 139 | } 140 | return audio_track, compat_track 141 | 142 | def format_audio_track(self, name): 143 | """ 144 | Split audio track with name and language 145 | 146 | Parameters 147 | ---------- 148 | name : str 149 | track name 150 | 151 | Returns 152 | ------- 153 | dict{'name':'...', 'language':'...'} 154 | """ 155 | track = {"name": None, "language": None} 156 | name = name.strip() 157 | if " / " in name: 158 | name_parts = name.split(" / ", 1) 159 | track["name"] = self.format_audio_track_name(name_parts[1]) 160 | track["language"] = name_parts[0] 161 | return track 162 | 163 | def format_subtitle_track(self, name): 164 | """ 165 | Format subtitle track with language and bitrate 166 | 167 | Parameters 168 | ---------- 169 | name : str 170 | track name 171 | 172 | Returns 173 | ------- 174 | dict{'language':'...', 'bitrate':'...'} 175 | """ 176 | track = {"language": None, "bitrate": None} 177 | name = name.strip() 178 | if " / " in name: 179 | name_parts = name.split(" / ", 1) 180 | track["language"] = name_parts[0].strip() 181 | track["bitrate"] = name_parts[1].strip() 182 | return track 183 | 184 | def playlist_report_format_video_track_name(self, name): 185 | """ 186 | Format playlist report video track name 187 | 188 | Parameters 189 | ---------- 190 | name : str 191 | track name 192 | 193 | Returns 194 | ------- 195 | str formatted track name 196 | """ 197 | try: 198 | parts = name.split() 199 | kbps_i = parts.index("kbps") 200 | before = " ".join(parts[: kbps_i - 1]).strip() 201 | after = " ".join(parts[kbps_i + 1 :]).strip() 202 | track_name = ( 203 | before + " / " + parts[kbps_i - 1] + " " + parts[kbps_i] + " / " + after 204 | ) 205 | track_name = self.format_video_track_name(track_name) 206 | return track_name 207 | except ValueError: 208 | return False 209 | 210 | def playlist_report_format_audio_track(self, name): 211 | """ 212 | Format playlist report audio track 213 | 214 | Parameters 215 | ---------- 216 | name : str 217 | track name 218 | 219 | Returns 220 | ------- 221 | dict{'name':'...', 'language':'...'} 222 | """ 223 | track = {"name": None, "language": None, "compat_track": None} 224 | try: 225 | name = name.strip() 226 | name_parts = name.split(" / ") 227 | name_parts0 = name_parts[0].strip().split() 228 | name = ( 229 | " ".join(name_parts0[:-4]) 230 | + " / " 231 | + name_parts0[-1] 232 | + " / " 233 | + " / ".join(name_parts[1:]).strip() 234 | ) 235 | track["name"] = self.format_audio_track_name(name) 236 | track["language"] = name_parts0[3] 237 | return track 238 | except ValueError: 239 | return False 240 | 241 | def parse_quick_summary_line(self, bdinfo, l): 242 | """ 243 | Parse quick summary line 244 | 245 | Parameters 246 | ---------- 247 | bdinfo : dict 248 | bdinfo dict 249 | l : str 250 | quick summary line 251 | 252 | Returns 253 | ------- 254 | bdinfo dict 255 | """ 256 | l2 = l.strip().lower() 257 | # parse hidden tracks 258 | l2 = l2.lstrip("* ") 259 | if ( 260 | l2.startswith("video:") 261 | or l2.startswith("audio:") 262 | or l2.startswith("subtitle:") 263 | ): 264 | track_name = l.split(":", 1)[1].strip() 265 | if l2.startswith("video:"): 266 | track_name = self.format_video_track_name(track_name) 267 | bdinfo["video"].append(track_name) 268 | elif l2.startswith("audio:"): 269 | audio_track = self.format_audio_track(track_name) 270 | if self.has_compat_track(audio_track["name"]): 271 | ( 272 | audio_track, 273 | compat_track, 274 | ) = self.format_audio_compatibility_track(audio_track) 275 | audio_track["compat_track"] = compat_track 276 | bdinfo["audio"].append(audio_track) 277 | elif l2.startswith("subtitle:"): 278 | bdinfo["subtitle"].append(self.format_subtitle_track(track_name)) 279 | else: 280 | # get all other bdinfo entries 281 | l = l.split(":", 1) 282 | if len(l) >= 2: 283 | bdinfo[l[0].strip().lower()] = l[1].strip() 284 | return bdinfo 285 | 286 | def expand_compat_tracks(self, bdinfo_audio): 287 | """ 288 | Expand audio compatibility tracks into two tracks and keep order 289 | 290 | Returns 291 | ------- 292 | audio_tracks list 293 | """ 294 | audio_tracks = list() 295 | for audio_track in bdinfo_audio: 296 | audio_tracks.append(audio_track) 297 | if "compat_track" in audio_track: 298 | audio_tracks.append(audio_track["compat_track"]) 299 | 300 | return audio_tracks 301 | -------------------------------------------------------------------------------- /vdator/parsers/codecs_parser.py: -------------------------------------------------------------------------------- 1 | class CodecsParser(object): 2 | """ 3 | Define codecs 4 | """ 5 | 6 | def __init__(self, codecs): 7 | """ 8 | Define codecs 9 | 10 | Parameters 11 | ---------- 12 | codecs : dict 13 | codec definitions 14 | """ 15 | 16 | """ 17 | { 18 | "codecs": { 19 | "video": {...}, 20 | "audio": {...}, 21 | "subtitles": {...}, 22 | "chapters": {...} 23 | }, 24 | "track_titles": { 25 | "video": {...}, 26 | "audio": {...} 27 | }, 28 | "scan_types": {...} 29 | } 30 | """ 31 | self.codecs = codecs 32 | 33 | # map of all codec names to extensions 34 | self.codec_ext = { 35 | **self.codecs["codecs"]["video"], 36 | **self.codecs["codecs"]["audio"], 37 | **self.codecs["codecs"]["subtitles"], 38 | **self.codecs["codecs"]["chapters"], 39 | } 40 | 41 | def is_video(self, codec): 42 | """ 43 | Is this a video codec? 44 | 45 | Parameters 46 | ---------- 47 | codec : str 48 | codec 49 | 50 | Returns 51 | ------- 52 | True if codec is a video codec, False otherwise. 53 | """ 54 | if codec in self.codecs["codecs"]["video"]: 55 | return True 56 | return False 57 | 58 | def is_video_title(self, codec): 59 | """ 60 | Is this a video title codec? 61 | 62 | Parameters 63 | ---------- 64 | codec : str 65 | codec 66 | 67 | Returns 68 | ------- 69 | True if codec is a video title codec, False otherwise. 70 | """ 71 | if codec in self.codecs["track_titles"]["video"]: 72 | return True 73 | return False 74 | 75 | def is_video_3d(self, codec): 76 | """ 77 | Is this a 3d video codec? 78 | 79 | Parameters 80 | ---------- 81 | codec : str 82 | codec 83 | 84 | Returns 85 | ------- 86 | True if codec is a 3d video, False otherwise. 87 | """ 88 | if codec in self.codecs["codecs"]["video_3d"]: 89 | return True 90 | return False 91 | 92 | def is_audio(self, codec): 93 | """ 94 | Is this an audio codec? 95 | 96 | Parameters 97 | ---------- 98 | codec : str 99 | codec 100 | 101 | Returns 102 | ------- 103 | True if codec is an audio codec, False otherwise. 104 | """ 105 | if codec in self.codecs["codecs"]["audio"]: 106 | return True 107 | return False 108 | 109 | def is_audio_title(self, codec): 110 | """ 111 | Is this an audio title codec? 112 | 113 | Parameters 114 | ---------- 115 | codec : str 116 | codec 117 | 118 | Returns 119 | ------- 120 | True if codec is an audio title codec, False otherwise. 121 | """ 122 | if codec in self.codecs["track_titles"]["audio"]: 123 | return True 124 | return False 125 | 126 | def is_sub(self, codec): 127 | """ 128 | Is this a subtitle codec? 129 | 130 | Parameters 131 | ---------- 132 | codec : str 133 | codec 134 | 135 | Returns 136 | ------- 137 | True if codec is a subtitle codec, False otherwise. 138 | """ 139 | if codec in self.codecs["codecs"]["subtitles"]: 140 | return True 141 | return False 142 | 143 | def is_chapter(self, codec): 144 | """ 145 | Is this a chapter codec? 146 | 147 | Parameters 148 | ---------- 149 | codec : str 150 | codec 151 | 152 | Returns 153 | ------- 154 | True if codec is a chapter codec, False otherwise. 155 | """ 156 | if codec in self.codecs["codecs"]["chapters"]: 157 | return True 158 | return False 159 | 160 | def is_codec(self, codec): 161 | """ 162 | Is this a valid codec? 163 | 164 | Parameters 165 | ---------- 166 | codec : str 167 | codec 168 | 169 | Returns 170 | ------- 171 | True if valid codec, False otherwise. 172 | """ 173 | return codec in self.codec_ext 174 | 175 | def get_codec_ext(self, codec): 176 | """ 177 | Get codec extension. Checks if codec is valid. 178 | 179 | Parameters 180 | ---------- 181 | codec : str 182 | codec 183 | 184 | Returns 185 | ------- 186 | str codec extension 187 | """ 188 | if codec not in self.codec_ext: 189 | return "" 190 | return self.codec_ext[codec] 191 | 192 | def get_video_codec_title_name(self, codec): 193 | """ 194 | Get name of video codec for title. Checks if video codec is valid. 195 | 196 | Parameters 197 | ---------- 198 | codec : str 199 | codec 200 | 201 | Returns 202 | ------- 203 | str codec title name 204 | """ 205 | if codec not in self.codecs["track_titles"]["video"]: 206 | return "" 207 | return self.codecs["track_titles"]["video"][codec] 208 | 209 | def get_audio_codec_title_name(self, codec): 210 | """ 211 | Get name of audio codec for title. Checks if audio codec is valid. 212 | 213 | Parameters 214 | ---------- 215 | codec : str 216 | codec 217 | 218 | Returns 219 | ------- 220 | str codec title name 221 | """ 222 | if codec not in self.codecs["track_titles"]["audio"]: 223 | return "" 224 | return self.codecs["track_titles"]["audio"][codec] 225 | 226 | def get_scan_type_title_name(self, scan_type, video_fps): 227 | """ 228 | Get name of video scan type for title. Checks if scan type is valid. 229 | 230 | Parameters 231 | ---------- 232 | scan_type : str 233 | scan type 234 | 235 | video_fps : str 236 | frame rate 237 | 238 | Returns 239 | ------- 240 | str scan type title name, boolean if actually progressive 241 | """ 242 | actually_progressive = False 243 | scan_type = scan_type.strip().lower() 244 | 245 | if len(scan_type) >= 1: 246 | scan_type = "progressive" if scan_type[0] == "p" else "interlaced" 247 | 248 | # interlaced @ 25fps is actually progressive 249 | # but it's still called interlaced 250 | if scan_type == "interlaced" and int(video_fps) == 25: 251 | actually_progressive = True 252 | 253 | if scan_type not in self.codecs["scan_types"]: 254 | return "", actually_progressive 255 | return self.codecs["scan_types"][scan_type], actually_progressive 256 | -------------------------------------------------------------------------------- /vdator/parsers/match_bdinfo_audio_to_mediainfo.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class MatchBDInfoAudioToMediaInfo(object): 5 | def __init__(self, remove_until_first_codec, bdinfo, mediainfo): 6 | self.remove_until_first_codec = remove_until_first_codec 7 | self.bdinfo = bdinfo 8 | self.mediainfo = mediainfo 9 | 10 | def match_bdinfo_audio_to_mediainfo(self): 11 | # tries to match bdinfo audio tracks to mediainfo by codec and channels 12 | # for every mediainfo track, pick first matching bdinfo track 13 | # returns a sorted list of bdinfo audio tracks 14 | sorted_bdinfo_audio_tracks = list() 15 | 16 | bdinfo_audio_tracks = copy.deepcopy(self.bdinfo["audio"]) 17 | mediainfo_audio_tracks = copy.deepcopy(self.mediainfo["audio"]) 18 | 19 | for mediainfo_audio_track in mediainfo_audio_tracks: 20 | # go through every mediainfo audio track 21 | mediainfo_audio_title, mediainfo_audio_track_parts = None, [] 22 | 23 | if "title" in mediainfo_audio_track: 24 | ( 25 | mediainfo_audio_title, 26 | _, 27 | _, 28 | ) = self.remove_until_first_codec.remove(mediainfo_audio_track["title"]) 29 | if mediainfo_audio_title: 30 | mediainfo_audio_track_parts = mediainfo_audio_title.split(" / ") 31 | 32 | # find the next matching bdinfo audio track 33 | for i, bdinfo_audio_track in enumerate(bdinfo_audio_tracks): 34 | bdinfo_audio_title = None 35 | if "name" in bdinfo_audio_track: 36 | ( 37 | bdinfo_audio_title, 38 | _, 39 | _, 40 | ) = self.remove_until_first_codec.remove(bdinfo_audio_track["name"]) 41 | 42 | if len(mediainfo_audio_track_parts) > 1 and bdinfo_audio_title: 43 | bdinfo_audio_track_parts = bdinfo_audio_title.split(" / ") 44 | if len(bdinfo_audio_track_parts) > 1: 45 | if ( 46 | bdinfo_audio_track_parts[0] 47 | == mediainfo_audio_track_parts[0] 48 | and bdinfo_audio_track_parts[1] 49 | == mediainfo_audio_track_parts[1] 50 | ): 51 | # codecs and channel match 52 | sorted_bdinfo_audio_tracks.append(bdinfo_audio_track) 53 | del bdinfo_audio_tracks[i] 54 | break 55 | 56 | if len(bdinfo_audio_tracks) == 0: 57 | break 58 | 59 | if len(bdinfo_audio_tracks) > 0: 60 | # add leftover bdinfo audio tracks 61 | sorted_bdinfo_audio_tracks.extend(bdinfo_audio_tracks) 62 | 63 | return sorted_bdinfo_audio_tracks 64 | -------------------------------------------------------------------------------- /vdator/parsers/media_info_parser.py: -------------------------------------------------------------------------------- 1 | class MediaInfoParser(object): 2 | """ 3 | Parse MediaInfo 4 | """ 5 | 6 | def parse(self, text): 7 | """ 8 | Parse mediainfo 9 | 10 | Parameters 11 | ---------- 12 | text : list 13 | list of mediainfo lines 14 | 15 | Returns 16 | ------- 17 | dict mediainfo with 'general', 'video', 'audio', 'text', and 'menu' keys 18 | """ 19 | mediainfo_sections = ["general", "video", "audio", "text", "menu"] 20 | # dictionary of lists for mediainfo data 21 | mediainfo = dict((k, list()) for k in mediainfo_sections) 22 | # starts at 0 on first loop 23 | section_index = dict((k, -1) for k in mediainfo_sections) 24 | # current mediainfo section 25 | curr_sect = None 26 | 27 | # skip blank lines 28 | text_list = list(filter(None, text)) 29 | 30 | for l in text_list: 31 | # new section of mediainfo 32 | section_word = l.strip().split()[0].strip().lower() 33 | if section_word in mediainfo_sections: 34 | # track current section 35 | curr_sect = section_word 36 | # increment index 37 | section_index[section_word] += 1 38 | # store new list for chapters, and new dictionary for other sections 39 | mediainfo[section_word].append( 40 | list() if section_word == "menu" else dict() 41 | ) 42 | continue 43 | 44 | # split mediainfo data line 45 | curr = l.split(" : ", 1) 46 | 47 | if curr_sect in ["general", "video", "audio", "text"] and len(curr) >= 2: 48 | # assign section to dictionary 49 | mediainfo[curr_sect][section_index[curr_sect]][ 50 | self.format_key(curr[0]) 51 | ] = curr[1] 52 | elif curr_sect == "menu": 53 | mediainfo["menu"][section_index[curr_sect]].append( 54 | self.parse_chapter(curr) 55 | ) 56 | 57 | return mediainfo 58 | 59 | def format_key(self, key): 60 | """ 61 | Format keys into abc_def_ghi 62 | 63 | Parameters 64 | ---------- 65 | key : str 66 | mediainfo key 67 | 68 | Returns 69 | ------- 70 | str formatted mediainfo key 71 | """ 72 | return ( 73 | key.strip() 74 | .replace(" ", "_") 75 | .replace("/", "_") 76 | .replace("(", "") 77 | .replace(")", "") 78 | .replace("*", "_") 79 | .replace(",", "") 80 | .lower() 81 | ) 82 | 83 | def parse_chapter(self, curr): 84 | """ 85 | Parse a single chapter 86 | 87 | Parameters 88 | ---------- 89 | curr : list 90 | current line 91 | 92 | Returns 93 | ------- 94 | dict chapter 95 | {"time": "...", "titles": [...], "languages": [...]} 96 | languages list has unique elements 97 | """ 98 | chapter = {"time": None, "titles": list(), "languages": set()} 99 | if len(curr) >= 1: 100 | chapter["time"] = curr[0].strip() 101 | if len(curr) >= 2: 102 | chapter_title = { 103 | "language": None, 104 | "title": None, 105 | } 106 | if " - " in curr[1]: 107 | langs = curr[1].split(" - ") 108 | for lang in langs: 109 | if ":" in lang: 110 | # chapter has a language 111 | ch = self.format_chapter(lang) 112 | chapter["titles"].append(ch) 113 | chapter["languages"].add(ch["language"]) 114 | elif ":" in curr[1]: 115 | # chapter has a language 116 | ch = self.format_chapter(curr[1]) 117 | chapter["titles"].append(ch) 118 | chapter["languages"].add(ch["language"]) 119 | else: 120 | # no language, just store title 121 | chapter_title["title"] = curr[1] 122 | chapter["titles"].append(chapter_title) 123 | chapter["languages"] = list(chapter["languages"]) 124 | return chapter 125 | 126 | def format_chapter(self, text): 127 | """ 128 | Format chapter language and title 129 | 130 | Parameters 131 | ---------- 132 | text : str 133 | chapter text 134 | 135 | Returns 136 | ------- 137 | dict chapter with 'language', 'title' keys 138 | """ 139 | l = text.split(":", 1) 140 | chapter = {"language": l[0].strip(), "title": l[1]} 141 | return chapter 142 | -------------------------------------------------------------------------------- /vdator/parsers/paste_parser.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from enum import Enum 3 | import os 4 | 5 | # load environment variables 6 | load_dotenv() 7 | 8 | # environment variables 9 | IGNORE_AFTER_LINE = os.environ.get("IGNORE_AFTER_LINE").strip() 10 | IGNORE_AFTER_LINE_METHOD = os.environ.get("IGNORE_AFTER_LINE_METHOD").strip() 11 | IGNORE_UNTIL_BLANK_LINE_PREFIXES = [ 12 | x.strip() 13 | for x in os.getenv("IGNORE_UNTIL_BLANK_LINE_PREFIXES", "").strip().split(",") 14 | ] 15 | 16 | 17 | class BDInfoType(Enum): 18 | QUICK_SUMMARY = 1 19 | PLAYLIST_REPORT = 2 20 | 21 | 22 | class PasteParser(object): 23 | def __init__(self, bdinfo_parser): 24 | self.bdinfo_parser = bdinfo_parser 25 | 26 | class Section(Enum): 27 | QUICK_SUMMARY = 1 28 | MEDIAINFO = 2 29 | PLAYLIST_REPORT = 3 30 | EAC3TO_LOG = 4 31 | 32 | class Section2(Enum): 33 | PLAYLIST_VIDEO = 1 34 | PLAYLIST_AUDIO = 2 35 | PLAYLIST_SUBTITLES = 3 36 | 37 | class Section3(Enum): 38 | PLAYLIST_INNER_VIDEO = 1 39 | PLAYLIST_INNER_AUDIO = 2 40 | 41 | def parse(self, text): 42 | """ 43 | Parse text to extract bdinfo, mediainfo and eac3to log 44 | 45 | Parameters 46 | ---------- 47 | text : str 48 | text to parse 49 | 50 | Returns 51 | ------- 52 | bdinfo, mediainfo, and eac3to lists 53 | """ 54 | bdinfo = {"video": list(), "audio": list(), "subtitle": list()} 55 | mediainfo = list() 56 | eac3to = list() 57 | eac3to_index = -1 58 | 59 | sect = None 60 | sect2 = None 61 | sect3 = None 62 | 63 | # parse bdinfo 64 | lines = text.splitlines() 65 | ignore_next_lines, did_first_mediainfo = False, False 66 | for l in lines: 67 | # break after ignore line 68 | if self._isIgnoreAfterLine(l): 69 | break 70 | 71 | if not l.strip(): 72 | # don't ignore input after blank line 73 | ignore_next_lines = False 74 | # skip blank lines 75 | continue 76 | 77 | if ignore_next_lines: 78 | continue 79 | 80 | if ( 81 | IGNORE_UNTIL_BLANK_LINE_PREFIXES 82 | and IGNORE_UNTIL_BLANK_LINE_PREFIXES[0] != "" 83 | ): 84 | l3 = l.strip().lower() 85 | for x in IGNORE_UNTIL_BLANK_LINE_PREFIXES: 86 | if l3.startswith(x): 87 | ignore_next_lines = True 88 | break 89 | 90 | l = l.strip() 91 | l2 = l.strip().lower() 92 | 93 | # determine current section 94 | # limit to first mediainfo 95 | if ( 96 | l2.startswith("quick summary") 97 | or l2.startswith("disc title") 98 | or l2.startswith("disc label") 99 | ): 100 | sect = self.Section.QUICK_SUMMARY 101 | bdinfo["type"] = BDInfoType.QUICK_SUMMARY 102 | elif l2.startswith("playlist report"): 103 | sect = self.Section.PLAYLIST_REPORT 104 | bdinfo["type"] = BDInfoType.PLAYLIST_REPORT 105 | elif l2.startswith("eac3to v"): 106 | sect = self.Section.EAC3TO_LOG 107 | eac3to.append(list()) 108 | eac3to_index += 1 109 | elif l2.startswith("general"): 110 | if did_first_mediainfo: 111 | sect = None 112 | else: 113 | sect = self.Section.MEDIAINFO 114 | did_first_mediainfo = True 115 | 116 | if sect == self.Section.QUICK_SUMMARY: 117 | # parse quick summary into bdinfo dict 118 | self.bdinfo_parser.parse_quick_summary_line(bdinfo, l) 119 | elif sect == self.Section.PLAYLIST_REPORT: 120 | 121 | if l2.startswith("video:"): 122 | sect2 = self.Section2.PLAYLIST_VIDEO 123 | elif l2.startswith("audio:"): 124 | sect2 = self.Section2.PLAYLIST_AUDIO 125 | elif l2.startswith("subtitles:"): 126 | sect2 = self.Section2.PLAYLIST_SUBTITLES 127 | 128 | if l2.startswith("-----"): 129 | if sect2 == self.Section2.PLAYLIST_VIDEO: 130 | sect3 = self.Section3.PLAYLIST_INNER_VIDEO 131 | elif sect2 == self.Section2.PLAYLIST_AUDIO: 132 | sect3 = self.Section3.PLAYLIST_INNER_AUDIO 133 | else: 134 | # skip tracks that start with minus sign 135 | if l.startswith("-"): 136 | continue 137 | # parse hidden tracks 138 | l = l.lstrip("* ") 139 | 140 | if ( 141 | sect2 == self.Section2.PLAYLIST_VIDEO 142 | and sect3 == self.Section3.PLAYLIST_INNER_VIDEO 143 | ): 144 | # format video track name with slashes 145 | track_name = ( 146 | self.bdinfo_parser.playlist_report_format_video_track_name( 147 | l 148 | ) 149 | ) 150 | if track_name: 151 | bdinfo["video"].append(track_name) 152 | 153 | elif ( 154 | sect2 == self.Section2.PLAYLIST_AUDIO 155 | and sect3 == self.Section3.PLAYLIST_INNER_AUDIO 156 | ): 157 | audio_track = ( 158 | self.bdinfo_parser.playlist_report_format_audio_track(l) 159 | ) 160 | if self.bdinfo_parser.has_compat_track(l): 161 | ( 162 | audio_track, 163 | compat_track, 164 | ) = self.bdinfo_parser.format_audio_compatibility_track( 165 | audio_track 166 | ) 167 | audio_track["compat_track"] = compat_track 168 | bdinfo["audio"].append(audio_track) 169 | 170 | elif sect == self.Section.MEDIAINFO: 171 | mediainfo.append(l) 172 | 173 | elif sect == self.Section.EAC3TO_LOG: 174 | if l.startswith("Done."): 175 | sect = None 176 | else: 177 | eac3to[eac3to_index].append(l) 178 | 179 | return bdinfo, mediainfo, eac3to 180 | 181 | def _isIgnoreAfterLine(self, l): 182 | """ 183 | Check if we should ignore all input after the current line 184 | 185 | Parameters 186 | ---------- 187 | l : str 188 | current line 189 | 190 | Returns 191 | ------- 192 | True if should ignore further input, False otherwise 193 | """ 194 | if IGNORE_AFTER_LINE_METHOD == "equals": 195 | if IGNORE_AFTER_LINE == l: 196 | return True 197 | elif IGNORE_AFTER_LINE_METHOD == "contains": 198 | if IGNORE_AFTER_LINE in l: 199 | return True 200 | return False 201 | -------------------------------------------------------------------------------- /vdator/parsers/url_parser.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse 2 | import re 3 | 4 | 5 | class URLParser(object): 6 | def __init__(self, urls): 7 | # regex used to extract urls from message 8 | self.urls_regex = r"(?Phttps?://[^\s]+)" 9 | 10 | """ 11 | # hostname 12 | 'example.com': { 13 | # regex to get paste's unique identifier 14 | 'slug_regex': 'https://example.com/(.*)', 15 | 16 | # link to raw text using {} in place of the unique identifier 17 | 'raw_url': 'https://example.com/raw/{}' 18 | } 19 | """ 20 | self.urls = urls 21 | 22 | def extract_supported_urls(self, text): 23 | # list of urls 24 | urls = re.findall(self.urls_regex, text) 25 | raw_urls = list() 26 | for url in urls: 27 | o = urlparse(url) 28 | # check if url is supported 29 | if o.hostname in self.urls: 30 | raw_url = self.get_raw_url(url, o.hostname, o.path) 31 | raw_urls.append(raw_url) 32 | return raw_urls 33 | 34 | def get_raw_url(self, url, hostname, path): 35 | # get url to raw content 36 | raw_url = url 37 | 38 | # check if its not already a raw url 39 | is_already_raw_url = re.search( 40 | self.urls[hostname]["raw_url_regex"].format("(.*)"), url 41 | ) 42 | 43 | if not is_already_raw_url: 44 | slug = re.search(self.urls[hostname]["slug_regex"], url) 45 | if slug: 46 | raw_url = self.urls[hostname]["raw_url"].format(slug.group(1)) 47 | 48 | return raw_url 49 | 50 | def get_urls(self): 51 | return self.urls 52 | -------------------------------------------------------------------------------- /vdator/reporter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | # APIs 4 | import emoji 5 | from helpers import num_to_emoji 6 | 7 | 8 | class Reporter(object): 9 | """ 10 | Keep track of types of responses 11 | """ 12 | 13 | def __init__(self): 14 | self.setup() 15 | 16 | def setup(self): 17 | """ 18 | Setup/Reset the reporter 19 | """ 20 | self.report = {"correct": 0, "warning": 0, "error": 0, "info": 0, "fail": 0} 21 | 22 | def print_report(self, type, message, record=True, new_line=True): 23 | """ 24 | Display report 25 | 26 | Parameters 27 | ---------- 28 | type : ReportType 29 | type of report: 'correct', 'warning', 'error', or 'info' 30 | 31 | message : str 32 | reply message 33 | 34 | record : bool 35 | should this report be kept track of in total 36 | 37 | new_line : bool 38 | print a new line after message 39 | default: True 40 | """ 41 | if record: 42 | self.report[type.lower()] += 1 43 | 44 | msg_type = { 45 | "correct": emoji.emojize(":ballot_box_with_check:", language="alias"), 46 | "warning": emoji.emojize(":warning:", language="alias"), 47 | "error": emoji.emojize(":x:", language="alias"), 48 | "info": emoji.emojize(":information_source:", language="alias"), 49 | "fail": emoji.emojize(":interrobang:", language="alias"), 50 | } 51 | 52 | if type.lower() in msg_type: 53 | type = msg_type[type.lower()] + " " 54 | else: 55 | type = "[" + type.upper() + "] " 56 | 57 | return type + message + ("\n" if new_line else "") 58 | 59 | def get_report(self): 60 | """ 61 | Get the report results 62 | 63 | Returns 64 | ------- 65 | report dict: {'correct' : int, 'warning' : int, 'error' : int, 'info' : int, 'fail' : int} 66 | """ 67 | return self.report 68 | 69 | def display_report(self): 70 | """ 71 | Get the report reply 72 | 73 | Returns 74 | ------- 75 | str reply 76 | """ 77 | reply = str(self.report["correct"]) + " correct, " 78 | 79 | reply += str(self.report["warning"]) + " warning" 80 | reply += "" if self.report["warning"] == 1 else "s" 81 | 82 | reply += ", " + str(self.report["error"]) + " error" 83 | reply += "" if self.report["error"] == 1 else "s" 84 | 85 | reply += ", " + str(self.report["fail"]) + " failure" 86 | reply += "" if self.report["fail"] == 1 else "s" 87 | 88 | reply += ", and " + str(self.report["info"]) + " info" 89 | return reply 90 | 91 | 92 | async def react_num_errors(message, num_errors): 93 | """ 94 | Add status reactions to discord message with number of errors 95 | Adds a plus sign if more than 10 errors 96 | 97 | Parameters 98 | ---------- 99 | message : discord.Message 100 | discord message to react to 101 | 102 | num_errors : int 103 | number of errors 104 | """ 105 | if num_errors in range(1, 11): 106 | # errors between 1 and 10 107 | em = num_to_emoji(num_errors) 108 | if em: 109 | await message.add_reaction(emoji.emojize(em, language="alias")) 110 | elif num_errors > 10: 111 | # more than 10 errors 112 | await message.add_reaction(emoji.emojize(num_to_emoji(10), language="alias")) 113 | await message.add_reaction(emoji.emojize(":heavy_plus_sign:", language="alias")) 114 | 115 | 116 | async def add_status_reactions(message, content): 117 | """ 118 | Add status reactions to discord message 119 | 120 | Parameters 121 | ---------- 122 | message : discord.Message 123 | discord message to react to 124 | 125 | content : str 126 | content to parse to determine reactions 127 | """ 128 | # add status reactions to message based on content 129 | report_re = re.search( 130 | r"(\d+)\scorrect,\s(\d+)\swarnings?,\s(\d+)\serrors?,\s(\d+)\sfailures?,\sand\s(\d+)\sinfo", 131 | content, 132 | ) 133 | if report_re: 134 | report = { 135 | "correct": int(report_re.group(1)), 136 | "warning": int(report_re.group(2)), 137 | "error": int(report_re.group(3)), 138 | "fail": int(report_re.group(4)), 139 | "info": int(report_re.group(5)), 140 | } 141 | 142 | if report["warning"] == 0 and report["error"] == 0 and report["fail"] == 0: 143 | await message.add_reaction( 144 | emoji.emojize(":ballot_box_with_check:", language="alias") 145 | ) 146 | else: 147 | if report["warning"] > 0: 148 | await message.add_reaction(emoji.emojize(":warning:", language="alias")) 149 | if report["error"] > 0: 150 | await message.add_reaction(emoji.emojize(":x:", language="alias")) 151 | 152 | num_errors = report["warning"] + report["error"] 153 | if num_errors > 0: 154 | await react_num_errors(message, num_errors) 155 | 156 | if report["fail"] > 0: 157 | await message.add_reaction( 158 | emoji.emojize(":interrobang:", language="alias") 159 | ) 160 | await react_num_errors(message, report["fail"]) 161 | -------------------------------------------------------------------------------- /vdator/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.3 2 | aiosignal==1.3.1 3 | async-timeout==4.0.2 4 | attrs==22.2.0 5 | black==22.12.0 6 | certifi==2022.12.7 7 | charset-normalizer==2.1.1 8 | cinemagoer==2022.12.27 9 | click==8.1.3 10 | discord-markdown==0.4.0 11 | discord.py==2.1.0 12 | emoji==2.2.0 13 | Flask==2.2.2 14 | frozenlist==1.3.3 15 | greenlet==2.0.2 16 | hunspell==0.5.5 17 | idna==3.4 18 | iso-639==0.4.5 19 | itsdangerous==2.1.2 20 | Jinja2==3.1.2 21 | joblib==1.2.0 22 | langdetect==1.0.9 23 | lxml==4.9.2 24 | MarkupSafe==2.1.2 25 | multidict==6.0.3 26 | mypy-extensions==0.4.3 27 | nltk==3.8.1 28 | numpy==1.23.5 29 | pathspec==0.10.3 30 | platformdirs==2.6.0 31 | pydash==5.1.2 32 | python-dotenv==0.21.0 33 | regex==2022.10.31 34 | requests==2.31.0 35 | six==1.16.0 36 | SQLAlchemy==1.4.46 37 | tmdbsimple==2.9.1 38 | tomli==2.0.1 39 | tqdm==4.64.1 40 | Unidecode==1.3.6 41 | urllib3==1.26.13 42 | Werkzeug==2.2.3 43 | yarl==1.8.2 44 | -------------------------------------------------------------------------------- /vdator/source_detector.py: -------------------------------------------------------------------------------- 1 | import os, re 2 | 3 | # 'mediainfo' to use mediainfo fields 4 | # 'nobdinfo' to assume DVD if no bdinfo given 5 | DVD_CHECK_MODE = os.environ.get("DVD_CHECK_MODE").strip() 6 | 7 | # detect if DVD, 1080p BluRay or UHD BluRay 8 | class SourceDetector(object): 9 | """ 10 | Define ways to detect source 11 | """ 12 | 13 | def setup(self, bdinfo, mediainfo): 14 | """ 15 | Parameters 16 | ---------- 17 | bdinfo : dict 18 | bdinfo 19 | 20 | mediainfo : dict 21 | mediainfo 22 | """ 23 | self.bdinfo = bdinfo 24 | self.mediainfo = mediainfo 25 | 26 | def is_dvd(self): 27 | """ 28 | Is this source a DVD? 29 | 30 | Returns 31 | ------- 32 | boolean True if DVD, False otherwise 33 | """ 34 | is_dvd = False 35 | 36 | if DVD_CHECK_MODE == "nobdinfo": 37 | if not self._has_bdinfo(): 38 | # no bdinfo given, assume dvds 39 | is_dvd = True 40 | elif DVD_CHECK_MODE == "mediainfo": 41 | if ( 42 | "video" in self.mediainfo 43 | and len(self.mediainfo["video"]) >= 1 44 | and "height" in self.mediainfo["video"][0] 45 | ): 46 | height = int( 47 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"])) 48 | ) 49 | if height <= 576: 50 | # height is 480p or 576p for dvds 51 | # Note: checking standard is NTSC or PAL won't work, as some BDs are NTSC 52 | is_dvd = True 53 | 54 | return is_dvd 55 | 56 | def is_ntsc_dvd(self): 57 | """ 58 | Is this source an NTSC DVD? 59 | 60 | Returns 61 | ------- 62 | boolean True if NTSC DVD, False otherwise 63 | """ 64 | is_ntsc = False 65 | 66 | if self.is_dvd(): 67 | if ( 68 | "video" in self.mediainfo 69 | and len(self.mediainfo["video"]) >= 1 70 | and "standard" in self.mediainfo["video"][0] 71 | ): 72 | if self.mediainfo["video"][0]["standard"].upper() == "NTSC": 73 | is_ntsc = True 74 | 75 | return is_ntsc 76 | 77 | def is_pal_dvd(self): 78 | """ 79 | Is this source a PAL DVD? 80 | 81 | Returns 82 | ------- 83 | boolean True if PAL DVD, False otherwise 84 | """ 85 | is_pal = False 86 | 87 | if self.is_dvd(): 88 | if ( 89 | "video" in self.mediainfo 90 | and len(self.mediainfo["video"]) >= 1 91 | and "standard" in self.mediainfo["video"][0] 92 | ): 93 | if self.mediainfo["video"][0]["standard"].upper() == "PAL": 94 | is_pal = True 95 | 96 | return is_pal 97 | 98 | def is_uhd(self): 99 | """ 100 | Is this source a UHD BluRay? 101 | 102 | Returns 103 | ------- 104 | boolean True if UHD, False otherwise 105 | """ 106 | is_uhd = False 107 | 108 | if ( 109 | "video" in self.mediainfo 110 | and len(self.mediainfo["video"]) >= 1 111 | and "height" in self.mediainfo["video"][0] 112 | ): 113 | height = int( 114 | "".join(re.findall(r"[\d]+", self.mediainfo["video"][0]["height"])) 115 | ) 116 | if height == 2160: 117 | is_uhd = True 118 | 119 | return is_uhd 120 | 121 | def is_dv(self): 122 | """ 123 | Does this source have dolby vision? 124 | 125 | Returns 126 | ------- 127 | boolean True if DV, False otherwise 128 | """ 129 | is_dv = False 130 | 131 | if ( 132 | "video" in self.mediainfo 133 | and len(self.mediainfo["video"]) >= 1 134 | and "hdr_format" in self.mediainfo["video"][0] 135 | ): 136 | if "Dolby Vision" in self.mediainfo["video"][0]["hdr_format"]: 137 | is_dv = True 138 | 139 | return is_dv 140 | 141 | def _has_bdinfo(self): 142 | """ 143 | Does the paste include bdinfo? 144 | 145 | Returns 146 | ------- 147 | boolean True if has bdinfo, False otherwise 148 | """ 149 | has_bdinfo = False 150 | 151 | if ( 152 | len(self.bdinfo["video"]) == 0 153 | and len(self.bdinfo["audio"]) == 0 154 | and len(self.bdinfo["subtitle"]) == 0 155 | ): 156 | has_bdinfo = False 157 | else: 158 | has_bdinfo = True 159 | 160 | return has_bdinfo 161 | --------------------------------------------------------------------------------