├── tests ├── data │ ├── enzyme │ │ ├── test4.mkv.yml │ │ ├── test6.mkv.yml │ │ ├── test7.mkv.yml │ │ ├── test8.mkv.yml │ │ ├── test1.mkv.yml │ │ ├── test3.mkv.yml │ │ ├── test2.mkv.yml │ │ ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml │ │ ├── issue-24-example-01.mkv.yml │ │ ├── test5.mkv.yml │ │ ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json │ │ └── issue-24-example-01.mkv.json │ ├── mkvmerge │ │ ├── test6.mkv.yml │ │ ├── test7.mkv.yml │ │ ├── test8.mkv.yml │ │ ├── test4.mkv.yml │ │ ├── test1.mkv.yml │ │ ├── test3.mkv.yml │ │ ├── test2.mkv.yml │ │ ├── test5.mkv.yml │ │ ├── media_001.mkv.yml │ │ └── media_001.mkv.json │ ├── ffmpeg │ │ ├── test4.mkv.yml │ │ ├── test6.mkv.yml │ │ ├── test7.mkv.yml │ │ ├── test8.mkv.yml │ │ ├── test1.mkv.yml │ │ ├── test2.mkv.yml │ │ ├── test3.mkv.yml │ │ ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml │ │ ├── issue-39-example-02.mkv.yml │ │ ├── test5-ffmpeg-v2.8.15.mkv.yml │ │ ├── test5.mkv.yml │ │ ├── issue-39-example-01.mkv.yml │ │ ├── media_001.mkv.yml │ │ ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json │ │ └── issue-39-example-02.mkv.json │ └── mediainfo │ │ ├── test4.mkv.yml │ │ ├── test6.mkv.yml │ │ ├── test7.mkv.yml │ │ ├── test8.mkv.yml │ │ ├── test1.mkv.yml │ │ ├── test2.mkv.yml │ │ ├── test3.mkv.yml │ │ ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml │ │ ├── test5.mkv.yml │ │ ├── media_001.mkv.yml │ │ ├── several-tracks.mkv.yml │ │ └── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json ├── test_video_profile.py ├── test_serializer.py ├── test_units.py ├── test_provider.py ├── test_properties.py ├── test_resolution.py ├── test_audiochannels.py ├── test_ffmpeg.py ├── test_mkvmerge.py ├── test_audiochannels.yml ├── test_enzyme.py ├── test_mediainfo.py ├── test_utils.py ├── conftest.py ├── test_properties.yml ├── test_resolution.yml └── __init__.py ├── scripts └── test.sh ├── knowit ├── providers │ ├── __init__.py │ ├── enzyme.py │ └── mkvmerge.py ├── rules │ ├── __init__.py │ ├── subtitle.py │ ├── general.py │ ├── video.py │ └── audio.py ├── properties │ ├── subtitle.py │ ├── __init__.py │ ├── audio.py │ ├── video.py │ └── general.py ├── __init__.py ├── units.py ├── config.py ├── utils.py ├── api.py ├── provider.py ├── __main__.py ├── serializer.py └── core.py ├── .github ├── dependabot.yml └── workflows │ ├── publish.yml │ ├── docker-publish.yml │ └── test.yml ├── LICENSE ├── setup.cfg ├── Dockerfile ├── .gitignore ├── pyproject.toml └── README.md /tests/data/enzyme/test4.mkv.yml: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | flake8 6 | mypy knowit 7 | mypy tests 8 | pytest --cov-report term --cov-report html --cov knowit -vv tests -------------------------------------------------------------------------------- /tests/test_video_profile.py: -------------------------------------------------------------------------------- 1 | from knowit.properties.video import VideoProfileTier 2 | 3 | 4 | def test_video_profile_tier_extract_key_when_no_tier(): 5 | assert VideoProfileTier._extract_key('') is False 6 | -------------------------------------------------------------------------------- /knowit/providers/__init__.py: -------------------------------------------------------------------------------- 1 | """Provider package.""" 2 | 3 | from knowit.providers.enzyme import EnzymeProvider 4 | from knowit.providers.ffmpeg import FFmpegProvider 5 | from knowit.providers.mediainfo import MediaInfoProvider 6 | from knowit.providers.mkvmerge import MkvMergeProvider 7 | -------------------------------------------------------------------------------- /tests/test_serializer.py: -------------------------------------------------------------------------------- 1 | # Need to import knowit to check if it changes the default behavior of pyyaml 2 | import yaml 3 | 4 | 5 | def test_unchanged_pyyaml() -> None: 6 | ret = yaml.safe_load('value: 0.5') 7 | assert isinstance(ret, dict) 8 | assert "value" in ret 9 | assert ret["value"] == 0.5 10 | -------------------------------------------------------------------------------- /knowit/rules/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from knowit.rules.audio import AtmosRule 3 | from knowit.rules.audio import AudioChannelsRule 4 | from knowit.rules.audio import DtsHdRule 5 | from knowit.rules.general import LanguageRule 6 | from knowit.rules.subtitle import ClosedCaptionRule 7 | from knowit.rules.subtitle import HearingImpairedRule 8 | from knowit.rules.video import ResolutionRule 9 | -------------------------------------------------------------------------------- /knowit/properties/subtitle.py: -------------------------------------------------------------------------------- 1 | 2 | from knowit.core import Configurable 3 | 4 | 5 | class SubtitleFormat(Configurable[str]): 6 | """Subtitle Format property.""" 7 | 8 | @classmethod 9 | def _extract_key(cls, value) -> str: 10 | key = str(value).upper() 11 | if key.startswith('S_'): 12 | key = key[2:] 13 | 14 | return key.split('/')[-1] 15 | -------------------------------------------------------------------------------- /tests/data/enzyme/test6.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:01:27.336000' 2 | path: tests/data/videos/test6.mkv 3 | container: mkv 4 | size: 23343928 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 854 pixel 9 | height: 480 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 480p 12 | audio: 13 | - id: 2 14 | language: und 15 | codec: MP3 16 | channels_count: 2 17 | channels: '2.0' 18 | provider: 19 | name: enzyme -------------------------------------------------------------------------------- /tests/data/mkvmerge/test6.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 87.336 2 | path: tests/data/videos/test6.mkv 3 | container: mkv 4 | size: 23343928 5 | video: 6 | - id: 1 7 | language: und 8 | width: 854 9 | height: 480 10 | scan_type: PROGRESSIVE 11 | resolution: 480p 12 | audio: 13 | - id: 2 14 | language: und 15 | codec: MP3 16 | channels_count: 2 17 | channels: 2.0 18 | sampling_rate: 48000 19 | provider: 20 | name: mkvmerge 21 | -------------------------------------------------------------------------------- /tests/data/enzyme/test7.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:00:37.043000' 2 | path: tests/data/videos/test7.mkv 3 | container: mkv 4 | size: 21848518 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 pixel 9 | height: 576 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: AAC 17 | channels_count: 2 18 | channels: '2.0' 19 | provider: 20 | name: enzyme -------------------------------------------------------------------------------- /tests/data/enzyme/test8.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:00:47.341000' 2 | path: tests/data/videos/test8.mkv 3 | container: mkv 4 | size: 21224737 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 pixel 9 | height: 576 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: AAC 17 | channels_count: 2 18 | channels: '2.0' 19 | provider: 20 | name: enzyme -------------------------------------------------------------------------------- /tests/data/mkvmerge/test7.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 37.043 2 | path: tests/data/videos/test7.mkv 3 | container: mkv 4 | size: 21848518 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 9 | height: 576 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: AAC 17 | channels_count: 2 18 | channels: 2.0 19 | sampling_rate: 48000 20 | provider: 21 | name: mkvmerge 22 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/test8.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 47.341 2 | path: tests/data/videos/test8.mkv 3 | container: mkv 4 | size: 21224737 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 9 | height: 576 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: AAC 17 | channels_count: 2 18 | channels: 2.0 19 | sampling_rate: 48000 20 | provider: 21 | name: mkvmerge 22 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/test4.mkv.yml: -------------------------------------------------------------------------------- 1 | path: tests/data/videos/test4.mkv 2 | container: mkv 3 | size: 21313902 4 | video: 5 | - id: 1 6 | language: und 7 | width: 1280 8 | height: 720 9 | scan_type: PROGRESSIVE 10 | resolution: 720p 11 | default: true 12 | audio: 13 | - id: 2 14 | language: und 15 | codec: VORBIS 16 | channels_count: 2 17 | channels: 2.0 18 | sampling_rate: 48000 19 | default: true 20 | provider: 21 | name: mkvmerge 22 | -------------------------------------------------------------------------------- /tests/data/enzyme/test1.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:01:27.336000' 2 | path: tests/data/videos/test1.mkv 3 | container: mkv 4 | size: 23339337 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 854 pixel 9 | height: 480 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 480p 12 | default: true 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: MP3 17 | channels_count: 2 18 | channels: '2.0' 19 | default: true 20 | provider: 21 | name: enzyme -------------------------------------------------------------------------------- /tests/data/enzyme/test3.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:00:49.064000' 2 | path: tests/data/videos/test3.mkv 3 | container: mkv 4 | size: 21061472 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 pixel 9 | height: 576 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | codec: MP3 17 | channels_count: 2 18 | channels: '2.0' 19 | default: true 20 | provider: 21 | name: enzyme -------------------------------------------------------------------------------- /tests/data/mkvmerge/test1.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 87.336 2 | path: tests/data/videos/test1.mkv 3 | container: mkv 4 | size: 23339337 5 | video: 6 | - id: 1 7 | language: und 8 | width: 854 9 | height: 480 10 | scan_type: PROGRESSIVE 11 | resolution: 480p 12 | default: true 13 | audio: 14 | - id: 2 15 | language: und 16 | codec: MP3 17 | channels_count: 2 18 | channels: 2.0 19 | sampling_rate: 48000 20 | default: true 21 | provider: 22 | name: mkvmerge 23 | -------------------------------------------------------------------------------- /tests/test_units.py: -------------------------------------------------------------------------------- 1 | from knowit.units import NullRegistry 2 | 3 | 4 | def test_null_registry_is_falsey(): 5 | registry = NullRegistry() 6 | assert not registry 7 | 8 | 9 | def test_null_registry_can_define(): 10 | registry = NullRegistry() 11 | registry.define('FPS = 1 * hertz') 12 | 13 | 14 | def test_null_registry_attribute_is_a_scalar_1(): 15 | registry = NullRegistry() 16 | assert registry.fps == 1 17 | assert registry.some_attribute == 1 18 | -------------------------------------------------------------------------------- /tests/data/enzyme/test2.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:00:47.509000' 2 | path: tests/data/videos/test2.mkv 3 | container: mkv 4 | size: 21142764 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 pixel 9 | height: 576 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | language: und 17 | codec: AAC 18 | channels_count: 2 19 | channels: '2.0' 20 | default: true 21 | provider: 22 | name: enzyme -------------------------------------------------------------------------------- /tests/test_provider.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from knowit.provider import Provider 4 | from knowit.units import units 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'frame_rate', [ 9 | pytest.param(3.4 * units.fps, id='Frame rate with magnitude'), 10 | pytest.param(1, id='Frame rate without magnitude'), 11 | ], 12 | ) 13 | def test_provider_validate_track_frame_rate(frame_rate): 14 | track = {'frame_rate': 0} 15 | Provider._validate_track('video', track) 16 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/test3.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 49.064 2 | path: tests/data/videos/test3.mkv 3 | container: mkv 4 | size: 21061472 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 9 | height: 576 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | language: en 17 | codec: MP3 18 | channels_count: 2 19 | channels: 2.0 20 | sampling_rate: 48000 21 | default: true 22 | provider: 23 | name: mkvmerge 24 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/test2.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 47.509 2 | path: tests/data/videos/test2.mkv 3 | container: mkv 4 | size: 21142764 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1354 9 | height: 576 10 | scan_type: PROGRESSIVE 11 | resolution: 1080p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | language: und 17 | codec: AAC 18 | channels_count: 2 19 | channels: 2.0 20 | sampling_rate: 48000 21 | default: true 22 | provider: 23 | name: mkvmerge 24 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/test4.mkv.yml: -------------------------------------------------------------------------------- 1 | path: tests/data/videos/test4.mkv 2 | size: 21313902 byte 3 | container: mkv 4 | video: 5 | - id: 0 6 | width: 1280 pixel 7 | height: 720 pixel 8 | scan_type: PROGRESSIVE 9 | aspect_ratio: 1.778 10 | pixel_aspect_ratio: 1.0 11 | resolution: 720p 12 | frame_rate: 24.0 FPS 13 | default: true 14 | audio: 15 | - id: 1 16 | codec: VORBIS 17 | channels_count: 2 18 | channels: '2.0' 19 | sampling_rate: 48000 20 | default: true 21 | provider: 22 | name: ffmpeg -------------------------------------------------------------------------------- /tests/test_properties.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | 4 | from knowit import properties 5 | 6 | from . import parameters_from_yaml 7 | 8 | 9 | @pytest.mark.parametrize('name,expected,input', parameters_from_yaml(__name__)) 10 | def test_resolution(config, context, name, expected, input): 11 | # Given 12 | prop_class = getattr(properties, name) 13 | sut = prop_class(config, name) 14 | track = {name: input} 15 | 16 | # When 17 | actual = sut.extract_value(track, context) 18 | 19 | # Then 20 | assert expected == actual 21 | -------------------------------------------------------------------------------- /tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml: -------------------------------------------------------------------------------- 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File 2 | path: tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv 3 | duration: 0:01:37 4 | container: mkv 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1920 pixel 9 | height: 1080 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 1080p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | name: 7.1Ch DTS-HD MA 17 | codec: DTS 18 | channels_count: 6 19 | channels: '5.1' 20 | default: true 21 | provider: 22 | name: enzyme 23 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/test6.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 6 2 | path: tests/data/videos/test6.mkv 3 | duration: '0:01:27.336000' 4 | size: 23343928 byte 5 | bit_rate: 2138309 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 854 pixel 10 | height: 480 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.779 13 | pixel_aspect_ratio: 1.0 14 | resolution: 480p 15 | frame_rate: 24.0 FPS 16 | codec: MSMPEG4V2 17 | audio: 18 | - id: 1 19 | codec: MP3 20 | channels_count: 2 21 | channels: '2.0' 22 | bit_rate: 208000 23 | sampling_rate: 48000 24 | provider: 25 | name: ffmpeg -------------------------------------------------------------------------------- /tests/test_resolution.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | 4 | from knowit.rules import ResolutionRule 5 | 6 | from . import ( 7 | assert_expected, 8 | parameters_from_yaml, 9 | ) 10 | 11 | 12 | @pytest.fixture 13 | def resolution_rule(): 14 | return ResolutionRule('resolution') 15 | 16 | 17 | @pytest.mark.parametrize('expected,input', parameters_from_yaml(__name__)) 18 | def test_resolution(resolution_rule, context, expected, input): 19 | # Given 20 | 21 | # When 22 | actual = resolution_rule.execute(input, input, context) 23 | 24 | # Then 25 | assert_expected(expected, actual) 26 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | 13 | - package-ecosystem: "github-actions" 14 | directory: "/" 15 | schedule: 16 | interval: "daily" 17 | -------------------------------------------------------------------------------- /tests/data/mediainfo/test4.mkv.yml: -------------------------------------------------------------------------------- 1 | path: tests/data/videos/test4.mkv 2 | size: 21313902 byte 3 | container: mkv 4 | video: 5 | - id: 1 6 | width: 1280 pixel 7 | height: 720 pixel 8 | scan_type: PROGRESSIVE 9 | aspect_ratio: 1.778 10 | pixel_aspect_ratio: 1.0 11 | resolution: 720p 12 | frame_rate: 24.0 FPS 13 | bit_rate: 2500000 14 | default: true 15 | audio: 16 | - id: 2 17 | codec: VORBIS 18 | channels_count: 2 19 | channels: '2.0' 20 | bit_rate: 192000 21 | bit_rate_mode: VBR 22 | sampling_rate: 48000 23 | compression: LOSSY 24 | default: true 25 | provider: 26 | name: mediainfo 27 | -------------------------------------------------------------------------------- /tests/test_audiochannels.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | from knowit.rules import AudioChannelsRule 4 | 5 | from . import ( 6 | assert_expected, 7 | parameters_from_yaml, 8 | ) 9 | 10 | 11 | @pytest.fixture 12 | def audiochannels_rule(): 13 | return AudioChannelsRule('audio channels') 14 | 15 | 16 | @pytest.mark.parametrize('expected,input', parameters_from_yaml(__name__)) 17 | def test_resolution(audiochannels_rule, context, expected, input): 18 | # Given 19 | 20 | # When 21 | actual = audiochannels_rule.execute(input, input, context) 22 | 23 | # Then 24 | assert_expected(expected, actual) 25 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/test7.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 7 2 | path: tests/data/videos/test7.mkv 3 | duration: '0:00:37.043000' 4 | size: 21848518 byte 5 | bit_rate: 4718520 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 576p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | audio: 20 | - id: 1 21 | codec: AAC 22 | profile: LC 23 | channels_count: 2 24 | channels: '2.0' 25 | sampling_rate: 48000 26 | provider: 27 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/ffmpeg/test8.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 8 2 | path: tests/data/videos/test8.mkv 3 | duration: '0:00:47.341000' 4 | size: 21224737 byte 5 | bit_rate: 3586698 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 576p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | audio: 20 | - id: 1 21 | codec: AAC 22 | profile: LC 23 | channels_count: 2 24 | channels: '2.0' 25 | sampling_rate: 48000 26 | provider: 27 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/ffmpeg/test1.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 1 2 | path: tests/data/videos/test1.mkv 3 | duration: '0:01:27.336000' 4 | size: 23339337 byte 5 | bit_rate: 2137889 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 854 pixel 10 | height: 480 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.779 13 | pixel_aspect_ratio: 1.0 14 | resolution: 480p 15 | frame_rate: 24.0 FPS 16 | codec: MSMPEG4V2 17 | default: true 18 | audio: 19 | - id: 1 20 | codec: MP3 21 | channels_count: 2 22 | channels: '2.0' 23 | bit_rate: 208000 24 | sampling_rate: 48000 25 | default: true 26 | provider: 27 | name: ffmpeg -------------------------------------------------------------------------------- /knowit/properties/__init__.py: -------------------------------------------------------------------------------- 1 | from knowit.properties.audio import ( 2 | AudioChannels, 3 | AudioCodec, 4 | AudioCompression, 5 | AudioProfile, 6 | BitRateMode, 7 | ) 8 | from knowit.properties.general import ( 9 | Basic, 10 | Duration, 11 | Language, 12 | Quantity, 13 | YesNo, 14 | ) 15 | from knowit.properties.subtitle import ( 16 | SubtitleFormat, 17 | ) 18 | from knowit.properties.video import ( 19 | Ratio, 20 | ScanType, 21 | VideoCodec, 22 | VideoDimensions, 23 | VideoEncoder, 24 | VideoHdrFormat, 25 | VideoProfile, 26 | VideoProfileLevel, 27 | VideoProfileTier, 28 | ) 29 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/test2.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Elephant Dream - test 2 2 | path: tests/data/videos/test2.mkv 3 | duration: '0:00:47.509000' 4 | size: 21142764 byte 5 | bit_rate: 3560212 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 2.351 13 | pixel_aspect_ratio: 1.322 14 | resolution: 1080p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | codec: AAC 23 | profile: LC 24 | channels_count: 2 25 | channels: '2.0' 26 | sampling_rate: 48000 27 | default: true 28 | provider: 29 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/ffmpeg/test3.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Elephant Dream - test 3 2 | path: tests/data/videos/test3.mkv 3 | duration: '0:00:49.064000' 4 | size: 21061472 byte 5 | bit_rate: 3434122 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 576p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | language: en 23 | codec: MP3 24 | channels_count: 2 25 | channels: '2.0' 26 | bit_rate: 172001 27 | sampling_rate: 48000 28 | default: true 29 | provider: 30 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/enzyme/issue-24-example-01.mkv.yml: -------------------------------------------------------------------------------- 1 | title: The 100 06x09 (What You Take With You) - release by messafan for CasStudio 2 | path: tests/data/enzyme/issue-24-example-01.mkv 3 | duration: 0:42:05 4 | container: mkv 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1280 pixel 9 | height: 720 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 720p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | codec: AC3 17 | channels_count: 2 18 | channels: '2.0' 19 | language: pt 20 | forced: true 21 | default: true 22 | - id: 3 23 | name: Stereo 24 | codec: AC3 25 | channels_count: 2 26 | channels: '2.0' 27 | language: en 28 | provider: 29 | name: enzyme 30 | -------------------------------------------------------------------------------- /tests/data/mediainfo/test6.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 6 2 | path: tests/data/videos/test6.mkv 3 | duration: '0:01:27.336000' 4 | size: 23343928 byte 5 | bit_rate: 2138310 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:01:27.333000' 10 | width: 854 pixel 11 | height: 480 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.779 14 | pixel_aspect_ratio: 1.0 15 | resolution: 480p 16 | frame_rate: 24.0 FPS 17 | codec: MSMPEG4V2 18 | media_type: video/MP4V-ES 19 | audio: 20 | - id: 2 21 | duration: '0:01:27.336000' 22 | codec: MP3 23 | profile: LAYER3 24 | channels_count: 2 25 | channels: '2.0' 26 | bit_rate_mode: VBR 27 | sampling_rate: 48000 28 | compression: LOSSY 29 | provider: 30 | name: mediainfo -------------------------------------------------------------------------------- /tests/data/mediainfo/test7.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 7 2 | path: tests/data/videos/test7.mkv 3 | duration: '0:00:37.043000' 4 | size: 21848518 byte 5 | bit_rate: 4718520 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:00:37.042000' 10 | width: 1024 pixel 11 | height: 576 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 576p 16 | frame_rate: 24.0 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '3.1' 21 | media_type: video/H264 22 | audio: 23 | - id: 2 24 | duration: '0:00:37.043000' 25 | codec: AAC 26 | profile: LC 27 | channels_count: 2 28 | channels: '2.0' 29 | sampling_rate: 48000 30 | compression: LOSSY 31 | provider: 32 | name: mediainfo -------------------------------------------------------------------------------- /tests/data/mediainfo/test8.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 8 2 | path: tests/data/videos/test8.mkv 3 | duration: '0:00:47.341000' 4 | size: 21224737 byte 5 | bit_rate: 3586699 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:00:47.333000' 10 | width: 1024 pixel 11 | height: 576 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 576p 16 | frame_rate: 24.0 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '3.1' 21 | media_type: video/H264 22 | audio: 23 | - id: 2 24 | duration: '0:00:47.341000' 25 | codec: AAC 26 | profile: LC 27 | channels_count: 2 28 | channels: '2.0' 29 | sampling_rate: 48000 30 | compression: LOSSY 31 | provider: 32 | name: mediainfo -------------------------------------------------------------------------------- /tests/data/mediainfo/test1.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 1 2 | path: tests/data/videos/test1.mkv 3 | duration: '0:01:27.336000' 4 | size: 23339337 byte 5 | bit_rate: 2137889 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:01:27.333000' 10 | width: 854 pixel 11 | height: 480 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.779 14 | pixel_aspect_ratio: 1.0 15 | resolution: 480p 16 | frame_rate: 24.0 FPS 17 | codec: MSMPEG4V2 18 | media_type: video/MP4V-ES 19 | default: true 20 | audio: 21 | - id: 2 22 | duration: '0:01:27.336000' 23 | codec: MP3 24 | profile: LAYER3 25 | channels_count: 2 26 | channels: '2.0' 27 | bit_rate_mode: VBR 28 | sampling_rate: 48000 29 | compression: LOSSY 30 | default: true 31 | provider: 32 | name: mediainfo -------------------------------------------------------------------------------- /tests/data/mediainfo/test2.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Elephant Dream - test 2 2 | path: tests/data/videos/test2.mkv 3 | duration: '0:00:47.509000' 4 | size: 21142764 byte 5 | bit_rate: 3560212 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:00:47.500000' 10 | width: 1024 pixel 11 | height: 576 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 2.351 14 | pixel_aspect_ratio: 1.322 15 | resolution: 1080p 16 | frame_rate: 24.0 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '3.1' 21 | media_type: video/H264 22 | default: true 23 | audio: 24 | - id: 2 25 | duration: '0:00:47.509000' 26 | codec: AAC 27 | profile: LC 28 | channels_count: 2 29 | channels: '2.0' 30 | sampling_rate: 48000 31 | compression: LOSSY 32 | default: true 33 | provider: 34 | name: mediainfo -------------------------------------------------------------------------------- /tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml: -------------------------------------------------------------------------------- 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File 2 | path: tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv 3 | duration: '0:01:37.931000' 4 | size: 40772443 byte 5 | bit_rate: 3330707 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1920 pixel 10 | height: 1080 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 1080p 15 | frame_rate: 23.976 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | name: 7.1Ch DTS-HD MA 23 | language: en 24 | codec: DTSHD 25 | profile: MA 26 | channels_count: 8 27 | channels: '7.1' 28 | bit_depth: 24 bit 29 | sampling_rate: 48000 30 | default: true 31 | provider: 32 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/enzyme/test5.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: '0:00:46.665000' 2 | path: tests/data/videos/test5.mkv 3 | container: mkv 4 | size: 31762747 byte 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 pixel 9 | height: 576 pixel 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | language: und 17 | codec: AAC 18 | channels_count: 2 19 | channels: '2.0' 20 | default: true 21 | - id: 10 22 | name: Commentary 23 | codec: AAC 24 | channels_count: 1 25 | channels: '1.0' 26 | subtitle: 27 | - id: 3 28 | default: true 29 | - id: 4 30 | language: hu 31 | - id: 5 32 | language: de 33 | - id: 6 34 | language: fr 35 | - id: 8 36 | language: es 37 | - id: 9 38 | language: it 39 | - id: 11 40 | language: ja 41 | - id: 7 42 | language: und 43 | provider: 44 | name: enzyme -------------------------------------------------------------------------------- /tests/data/mediainfo/test3.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Elephant Dream - test 3 2 | path: tests/data/videos/test3.mkv 3 | duration: '0:00:49.064000' 4 | size: 21061472 byte 5 | bit_rate: 3434122 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:00:49.083000' 10 | width: 1024 pixel 11 | height: 576 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 576p 16 | frame_rate: 24.0 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '3.1' 21 | media_type: video/H264 22 | default: true 23 | audio: 24 | - id: 2 25 | language: en 26 | duration: '0:00:49.064000' 27 | codec: MP3 28 | profile: LAYER3 29 | channels_count: 2 30 | channels: '2.0' 31 | bit_rate_mode: VBR 32 | sampling_rate: 48000 33 | compression: LOSSY 34 | default: true 35 | provider: 36 | name: mediainfo -------------------------------------------------------------------------------- /knowit/rules/subtitle.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from knowit.core import Rule 4 | 5 | 6 | class ClosedCaptionRule(Rule): 7 | """Closed caption rule.""" 8 | 9 | cc_re = re.compile(r'(\bcc\d\b)', re.IGNORECASE) 10 | 11 | def execute(self, props, pv_props, context): 12 | """Execute closed caption rule.""" 13 | if '_closed_caption' in pv_props and self.cc_re.search(pv_props['_closed_caption']): 14 | return True 15 | 16 | if 'guessed' in pv_props: 17 | guessed = pv_props['guessed'] 18 | return guessed.get('closed_caption') 19 | 20 | 21 | class HearingImpairedRule(Rule): 22 | """Hearing Impaired rule.""" 23 | 24 | def execute(self, props, pv_props, context): 25 | """Hearing Impaired.""" 26 | if 'guessed' in pv_props: 27 | guessed = pv_props['guessed'] 28 | return guessed.get('hearing_impaired') 29 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/test5.mkv.yml: -------------------------------------------------------------------------------- 1 | duration: 46.665 2 | path: tests/data/videos/test5.mkv 3 | container: mkv 4 | size: 31762747 5 | video: 6 | - id: 1 7 | language: und 8 | width: 1024 9 | height: 576 10 | scan_type: PROGRESSIVE 11 | resolution: 576p 12 | codec: H264 13 | default: true 14 | audio: 15 | - id: 2 16 | language: und 17 | codec: AAC 18 | channels_count: 2 19 | channels: 2.0 20 | sampling_rate: 48000 21 | default: true 22 | - id: 10 23 | language: en 24 | codec: AAC 25 | channels_count: 1 26 | channels: 1.0 27 | sampling_rate: 22050 28 | subtitle: 29 | - id: 3 30 | language: en 31 | default: true 32 | - id: 4 33 | language: hu 34 | - id: 5 35 | language: de 36 | - id: 6 37 | language: fr 38 | - id: 8 39 | language: es 40 | - id: 9 41 | language: it 42 | - id: 11 43 | language: ja 44 | - id: 7 45 | language: und 46 | provider: 47 | name: mkvmerge 48 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/issue-39-example-02.mkv.yml: -------------------------------------------------------------------------------- 1 | path: 'Z:\Videos\Shows (Prospective)\Band of Brothers (2001)\01x01 - Currahee.mkv' 2 | duration: '1:13:14.162000' 3 | size: 983004345 byte 4 | bit_rate: 1789655 5 | container: mkv 6 | video: 7 | - id: 0 8 | duration: '1:13:14.056708' 9 | width: 1920 pixel 10 | height: 1080 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: '1.778' 13 | pixel_aspect_ratio: '1.0' 14 | resolution: 1080p 15 | frame_rate: 23.976 FPS 16 | bit_rate: 1500697 17 | codec: H265 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | name: Surround 23 | language: en 24 | duration: '1:13:12.106000' 25 | codec: AAC 26 | profile: HEAAC 27 | channels_count: 6 28 | channels: '5.1' 29 | bit_rate: 256002 30 | sampling_rate: 48000 31 | default: true 32 | subtitle: 33 | - id: 2 34 | language: en 35 | format: PGS 36 | provider: 37 | name: ffmpeg 38 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/media_001.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Media 001 2 | duration: '0:57:08.352000' 3 | path: tests/data/mkvmerge/media_001.mkv 4 | container: mkv 5 | video: 6 | - id: 1 7 | language: en 8 | width: 3840 9 | height: 2160 10 | resolution: 2160p 11 | scan_type: PROGRESSIVE 12 | codec: H265 13 | audio: 14 | - id: 2 15 | language: en 16 | codec: TRUEHD 17 | channels_count: 8 18 | channels: 7.1 19 | sampling_rate: 48000 20 | default: true 21 | - id: 3 22 | language: en 23 | codec: AC3 24 | channels_count: 6 25 | channels: 5.1 26 | sampling_rate: 48000 27 | - id: 4 28 | language: de 29 | codec: AC3 30 | channels_count: 6 31 | channels: 5.1 32 | sampling_rate: 48000 33 | - id: 5 34 | language: pt-BR 35 | codec: DTS 36 | channels_count: 2 37 | channels: 2.0 38 | sampling_rate: 48000 39 | subtitle: 40 | - id: 6 41 | language: en 42 | - id: 7 43 | language: de 44 | - id: 8 45 | language: pt-BR 46 | provider: 47 | name: mkvmerge 48 | -------------------------------------------------------------------------------- /tests/data/mediainfo/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml: -------------------------------------------------------------------------------- 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File 2 | path: tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv 3 | duration: '0:01:37.931000' 4 | size: 40772443 byte 5 | bit_rate: 3330708 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:01:37.931000' 10 | width: 1920 pixel 11 | height: 1080 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 1080p 16 | frame_rate: 23.976 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '4' 21 | media_type: video/H264 22 | default: true 23 | audio: 24 | - id: 2 25 | name: 7.1Ch DTS-HD MA 26 | language: en 27 | duration: '0:01:37.931000' 28 | codec: DTSHD 29 | profile: MA 30 | channels_count: 8 31 | channels: '7.1' 32 | bit_depth: 24 bit 33 | bit_rate_mode: VBR 34 | sampling_rate: 48000 35 | compression: LOSSLESS 36 | default: true 37 | provider: 38 | name: mediainfo -------------------------------------------------------------------------------- /tests/test_ffmpeg.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from knowit import know 6 | 7 | from . import ( 8 | assert_expected, 9 | id_func, 10 | mediafiles 11 | ) 12 | 13 | 14 | @pytest.mark.parametrize('media', mediafiles.get_json_media('ffmpeg'), ids=id_func) 15 | def test_ffmpeg_provider(ffmpeg, media, options): 16 | # Given 17 | ffmpeg[media.video_path] = media.input_data 18 | 19 | # When 20 | actual = know(media.video_path, options) 21 | 22 | # Then 23 | assert_expected(media.expected_data, actual, options) 24 | assert pickle.loads(pickle.dumps(actual)) == actual 25 | 26 | 27 | @pytest.mark.parametrize('media', mediafiles.get_real_media('ffmpeg'), ids=id_func) 28 | def test_ffmpeg_provider_real_media(media, options): 29 | # Given 30 | options['provider'] = 'ffmpeg' 31 | 32 | # When 33 | actual = know(media.video_path, options) 34 | 35 | # Then 36 | assert_expected(media.expected_data, actual, options) 37 | assert pickle.loads(pickle.dumps(actual)) == actual 38 | -------------------------------------------------------------------------------- /tests/test_mkvmerge.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from knowit import know 6 | 7 | from . import ( 8 | assert_expected, 9 | id_func, 10 | mediafiles 11 | ) 12 | 13 | 14 | @pytest.mark.parametrize('media', mediafiles.get_json_media('mkvmerge'), ids=id_func) 15 | def test_mkvmerge_provider(mkvmerge, media, options): 16 | # Given 17 | mkvmerge[media.video_path] = media.input_data 18 | 19 | # When 20 | actual = know(media.video_path, options) 21 | 22 | # Then 23 | assert_expected(media.expected_data, actual, options) 24 | assert pickle.loads(pickle.dumps(actual)) == actual 25 | 26 | 27 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mkvmerge'), ids=id_func) 28 | def test_mkvmerge_provider_real_media(media, options): 29 | # Given 30 | options['provider'] = 'mkvmerge' 31 | 32 | # When 33 | actual = know(media.video_path, options) 34 | 35 | # Then 36 | assert_expected(media.expected_data, actual, options) 37 | assert pickle.loads(pickle.dumps(actual)) == actual 38 | -------------------------------------------------------------------------------- /knowit/rules/general.py: -------------------------------------------------------------------------------- 1 | from logging import NullHandler, getLogger 2 | 3 | from trakit.api import trakit 4 | 5 | from knowit.core import Rule 6 | 7 | logger = getLogger(__name__) 8 | logger.addHandler(NullHandler()) 9 | 10 | 11 | class GuessTitleRule(Rule): 12 | """Guess properties from track title.""" 13 | 14 | def execute(self, props, pv_props, context): 15 | """Language detection using name.""" 16 | if 'name' in props: 17 | language = props.get('language') 18 | options = {'expected_language': language} if language else {} 19 | guessed = trakit(props['name'], options) 20 | if guessed: 21 | return guessed 22 | 23 | 24 | class LanguageRule(Rule): 25 | """Language rules.""" 26 | 27 | def execute(self, props, pv_props, context): 28 | """Language detection using name.""" 29 | if 'guessed' not in pv_props: 30 | return 31 | 32 | guess = pv_props['guessed'] 33 | if 'language' in guess: 34 | return guess['language'] 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Rato 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test_audiochannels.yml: -------------------------------------------------------------------------------- 1 | '1.0': 2 | - channels_count: 1 3 | - channels_count: 1 4 | channel_positions: 1/0/0 5 | 6 | '2.0': 7 | - channels_count: 2 8 | - channels_count: 2 9 | channel_positions: 2/0/0 10 | 11 | '2.1': 12 | - channels_count: 3 13 | channel_positions: 2/0/0.1 14 | 15 | '3.0': 16 | - channels_count: 3 17 | channel_positions: 3/0/0 18 | 19 | '3.1': 20 | - channels_count: 4 21 | channel_positions: 3/0/0.1 22 | 23 | '4.0': 24 | - channels_count: 4 25 | channel_positions: 2/2/0 26 | - channels_count: 4 27 | channel_positions: 2/0/2 28 | 29 | '4.1': 30 | - channels_count: 5 31 | channel_positions: 2/2/0.1 32 | - channels_count: 5 33 | channel_positions: 2/0/2.1 34 | 35 | '5.0': 36 | - channels_count: 5 37 | channel_positions: 3/2/0 38 | 39 | '5.1': 40 | - channels_count: 6 41 | - channels_count: 6 42 | channel_positions: 3/2/0.1 43 | 44 | '7.1': 45 | - channels_count: 8 46 | - channels_count: 8 47 | channel_positions: [3/2/2.1, 3/2/0.1] 48 | 49 | '6.1': 50 | - channels_count: 7 51 | channel_positions: 3/3/0.1 52 | 53 | '20.1': 54 | - channels_count: 21 55 | channel_positions: 5/4/11.1 56 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out the repo 13 | uses: actions/checkout@v4 14 | 15 | - name: Setup python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: 3.13 19 | 20 | - name: Install poetry 21 | uses: snok/install-poetry@v1.4.1 22 | with: 23 | virtualenvs-in-project: true 24 | 25 | - name: Load cached venv 26 | id: cached-poetry-dependencies 27 | uses: actions/cache@v4 28 | with: 29 | path: .venv 30 | key: venv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }} 31 | 32 | - name: Install dependencies 33 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 34 | run: poetry install --no-interaction --no-root 35 | 36 | - name: Install library 37 | run: poetry install --no-interaction 38 | 39 | - name: Publish 40 | env: 41 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }} 42 | run: poetry publish --build -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | 8 | jobs: 9 | publish_to_docker_hub: 10 | name: Publish Docker image to Docker Hub 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out the repo 14 | uses: actions/checkout@v4 15 | 16 | - name: Log in to Docker Hub 17 | uses: docker/login-action@v3 18 | with: 19 | username: ${{ secrets.DOCKER_USERNAME }} 20 | password: ${{ secrets.DOCKER_PASSWORD }} 21 | 22 | - name: Extract metadata (tags, labels) for Docker 23 | id: meta 24 | uses: docker/metadata-action@v5 25 | with: 26 | images: ratoaq2/knowit 27 | 28 | - name: Set up QEMU 29 | uses: docker/setup-qemu-action@v3 30 | 31 | - name: Set up Docker Buildx 32 | uses: docker/setup-buildx-action@v3 33 | 34 | - name: Build and push Docker image 35 | uses: docker/build-push-action@v6 36 | with: 37 | context: . 38 | platforms: linux/amd64,linux/arm64 39 | push: true 40 | tags: ${{ steps.meta.outputs.tags }} 41 | labels: ${{ steps.meta.outputs.labels }} 42 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,.github,.pytest_cache,.venv,dist 3 | import-order-style = cryptography 4 | application-import-names = knowit 5 | max-line-length = 120 6 | ignore = 7 | # D100 Missing docstring in public module 8 | D100 9 | # D103 Missing docstring in public function 10 | D103 11 | # I201 Missing newline between import groups 12 | I201 13 | per-file-ignores = 14 | __init__.py: 15 | # D104 Missing docstring in public package 16 | D104 17 | # F401 Imported but unused 18 | F401 19 | knowit/__init__.py: 20 | # E402 Module level import not at top of file 21 | E402 22 | # F401 Imported but unused 23 | F401 24 | knowit/api.py: 25 | # N818 error suffix in exception names 26 | N818 27 | 28 | [mypy] 29 | 30 | [mypy-pint.*] 31 | ignore_missing_imports = True 32 | 33 | [mypy-babelfish.*] 34 | ignore_missing_imports = True 35 | 36 | [mypy-enzyme.*] 37 | ignore_missing_imports = True 38 | 39 | [mypy-pkg_resources.*] 40 | ignore_missing_imports = True 41 | 42 | [mypy-pymediainfo.*] 43 | ignore_missing_imports = True 44 | 45 | [mypy-trakit.*] 46 | ignore_missing_imports = True 47 | 48 | [mypy-yaml.*] 49 | ignore_missing_imports = True -------------------------------------------------------------------------------- /tests/test_enzyme.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | from knowit import KnowitException, know 5 | 6 | from . import ( 7 | assert_expected, 8 | id_func, 9 | mediafiles 10 | ) 11 | 12 | 13 | @pytest.mark.parametrize('media', mediafiles.get_json_media('enzyme'), ids=id_func) 14 | def test_enzyme_provider(enzyme, media, options): 15 | # Given 16 | enzyme[media.video_path] = media.input_data 17 | 18 | # When 19 | actual = know(media.video_path, options) 20 | 21 | # Then 22 | assert_expected(media.expected_data, actual, options) 23 | assert pickle.loads(pickle.dumps(actual)) == actual 24 | 25 | 26 | @pytest.mark.parametrize('media', mediafiles.get_real_media('enzyme'), ids=id_func) 27 | def test_enzyme_provider_real_media(media, options): 28 | # Given 29 | options['provider'] = 'enzyme' 30 | options['fail_on_error'] = False 31 | 32 | # When 33 | if not media.expected_data: 34 | with pytest.raises(KnowitException): 35 | know(media.video_path, options) 36 | else: 37 | actual = know(media.video_path, options) 38 | 39 | # Then 40 | assert_expected(media.expected_data, actual, options) 41 | assert pickle.loads(pickle.dumps(actual)) == actual 42 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/test5-ffmpeg-v2.8.15.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 8 2 | path: test5.mkv 3 | duration: '0:00:46.665000' 4 | size: 31762747 byte 5 | bit_rate: 5445236 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 576p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | codec: AAC 23 | profile: LC 24 | channels_count: 2 25 | channels: '2.0' 26 | sampling_rate: 48000 27 | default: true 28 | - id: 8 29 | name: Commentary 30 | language: en 31 | codec: AAC 32 | profile: LC 33 | channels_count: 1 34 | channels: '1.0' 35 | sampling_rate: 22050 36 | subtitle: 37 | - id: 2 38 | language: en 39 | format: SUBRIP 40 | default: true 41 | - id: 3 42 | language: hu 43 | format: SUBRIP 44 | - id: 4 45 | language: de 46 | format: SUBRIP 47 | - id: 5 48 | language: fr 49 | format: SUBRIP 50 | - id: 6 51 | language: es 52 | format: SUBRIP 53 | - id: 7 54 | language: it 55 | format: SUBRIP 56 | - id: 9 57 | language: ja 58 | format: SUBRIP 59 | - id: 10 60 | format: SUBRIP 61 | provider: 62 | name: ffmpeg -------------------------------------------------------------------------------- /tests/data/ffmpeg/test5.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 8 2 | path: tests/data/videos/test5.mkv 3 | duration: '0:00:46.665000' 4 | size: 31762747 byte 5 | bit_rate: 5445236 6 | container: mkv 7 | video: 8 | - id: 0 9 | width: 1024 pixel 10 | height: 576 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: 1.778 13 | pixel_aspect_ratio: 1.0 14 | resolution: 576p 15 | frame_rate: 24.0 FPS 16 | bit_depth: 8 bit 17 | codec: H264 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | codec: AAC 23 | profile: LC 24 | channels_count: 2 25 | channels: '2.0' 26 | sampling_rate: 48000 27 | default: true 28 | - id: 8 29 | name: Commentary 30 | language: en 31 | codec: AAC 32 | profile: LC 33 | channels_count: 1 34 | channels: '1.0' 35 | sampling_rate: 22050 36 | subtitle: 37 | - id: 2 38 | language: en 39 | format: SUBRIP 40 | default: true 41 | - id: 3 42 | language: hu 43 | format: SUBRIP 44 | - id: 4 45 | language: de 46 | format: SUBRIP 47 | - id: 5 48 | language: fr 49 | format: SUBRIP 50 | - id: 6 51 | language: es 52 | format: SUBRIP 53 | - id: 7 54 | language: it 55 | format: SUBRIP 56 | - id: 9 57 | language: ja 58 | format: SUBRIP 59 | - id: 10 60 | format: SUBRIP 61 | provider: 62 | name: ffmpeg -------------------------------------------------------------------------------- /knowit/__init__.py: -------------------------------------------------------------------------------- 1 | """Know your media files better.""" 2 | from importlib import metadata 3 | 4 | __title__ = metadata.metadata(__package__)['name'] 5 | __version__ = metadata.version(__package__) 6 | __short_version__ = '.'.join(__version__.split('.')[:2]) 7 | __author__ = metadata.metadata(__package__)['author'] 8 | __license__ = metadata.metadata(__package__)['license'] 9 | __url__ = 'https://github.com/ratoaq2/knowit' 10 | 11 | del metadata 12 | 13 | #: Video extensions 14 | VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik', 15 | '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli', 16 | '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e', 17 | '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', 18 | '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv', 19 | '.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', 20 | '.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid') 21 | 22 | from knowit.api import KnowitException, know 23 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/issue-39-example-01.mkv.yml: -------------------------------------------------------------------------------- 1 | path: 'Z:\Videos\Shows\Heroes (2006)\01x23 - How to Stop an Exploding Man.mkv' 2 | duration: '0:44:56.704000' 3 | size: 369338385 byte 4 | bit_rate: 1095673 5 | container: mkv 6 | video: 7 | - id: 0 8 | duration: '0:44:56.485708' 9 | width: 1920 pixel 10 | height: 1080 pixel 11 | scan_type: PROGRESSIVE 12 | aspect_ratio: '1.778' 13 | pixel_aspect_ratio: '1.0' 14 | resolution: 1080p 15 | frame_rate: 23.976 FPS 16 | bit_rate: 900213 17 | codec: H265 18 | profile: MAIN 19 | default: true 20 | audio: 21 | - id: 1 22 | name: Surround 23 | language: und 24 | duration: '0:44:56.661000' 25 | codec: AAC 26 | profile: HEAAC 27 | channels_count: 6 28 | channels: '5.1' 29 | bit_rate: 192003 30 | sampling_rate: 48000 31 | default: true 32 | subtitle: 33 | - id: 2 34 | language: ar 35 | format: ASS 36 | - id: 3 37 | language: da 38 | format: ASS 39 | - id: 4 40 | language: nl 41 | format: ASS 42 | - id: 5 43 | language: en 44 | format: ASS 45 | - id: 6 46 | language: fr 47 | format: ASS 48 | - id: 7 49 | language: de 50 | format: ASS 51 | - id: 8 52 | language: fa 53 | format: ASS 54 | - id: 9 55 | language: es 56 | format: ASS 57 | - id: 10 58 | language: vi 59 | format: ASS 60 | provider: 61 | name: ffmpeg 62 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.13-slim as builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PYTHONDONTWRITEBYTECODE=1 \ 7 | PIP_NO_CACHE_DIR=off \ 8 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 9 | PIP_DEFAULT_TIMEOUT=100 \ 10 | POETRY_VERSION=1.8.3 \ 11 | POETRY_VIRTUALENVS_CREATE=0 12 | 13 | RUN apt-get update \ 14 | && apt-get install -y --no-install-recommends python3-distutils python3-venv \ 15 | && apt-get clean \ 16 | && rm -rf /var/lib/apt/lists/* 17 | 18 | RUN pip install "poetry==$POETRY_VERSION" 19 | 20 | WORKDIR /app 21 | COPY poetry.lock pyproject.toml README.md /app/ 22 | RUN poetry install --no-interaction --no-ansi --only main 23 | RUN pip install platformdirs 24 | COPY knowit/ /app/knowit/ 25 | RUN poetry build --no-interaction --no-ansi 26 | 27 | 28 | FROM python:3.13-slim 29 | 30 | ENV PYTHONFAULTHANDLER=1 \ 31 | PYTHONUNBUFFERED=1 \ 32 | PYTHONHASHSEED=random \ 33 | PYTHONDONTWRITEBYTECODE=1 \ 34 | PIP_NO_CACHE_DIR=off \ 35 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 36 | PIP_DEFAULT_TIMEOUT=100 37 | 38 | RUN apt-get update \ 39 | && apt-get install -y --no-install-recommends mediainfo ffmpeg mkvtoolnix \ 40 | && apt-get clean \ 41 | && rm -rf /var/lib/apt/lists/* 42 | 43 | COPY --from=builder /app/dist /usr/src/dist 44 | 45 | RUN pip install /usr/src/dist/knowit-*.tar.gz 46 | 47 | WORKDIR / 48 | 49 | ENTRYPOINT ["knowit"] 50 | CMD ["--help"] 51 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.9, '3.10', 3.11, 3.12, 3.13] 15 | steps: 16 | - name: Check out the repo 17 | uses: actions/checkout@v4 18 | 19 | - name: Install system dependencies 20 | run: | 21 | sudo apt-get update 22 | sudo apt-get install -y mediainfo ffmpeg mkvtoolnix 23 | 24 | - name: Setup python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | 29 | - name: Install poetry 30 | uses: snok/install-poetry@v1.4.1 31 | with: 32 | virtualenvs-in-project: true 33 | 34 | - name: Load cached venv 35 | id: cached-poetry-dependencies 36 | uses: actions/cache@v4 37 | with: 38 | path: .venv 39 | key: venv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }} 40 | 41 | - name: Install dependencies 42 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 43 | run: poetry install --no-interaction --no-root 44 | 45 | - name: Install library 46 | run: poetry install --no-interaction 47 | 48 | - name: Run tests 49 | run: | 50 | source .venv/bin/activate 51 | bash scripts/test.sh 52 | -------------------------------------------------------------------------------- /tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "duration": "0:01:37", 4 | "muxing_app": "libebml v1.3.0 + libmatroska v1.4.1", 5 | "date_utc": "2013-12-13 17:49:28", 6 | "writing_app": "mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec 1 2013 17:55:00", 7 | "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File" 8 | }, 9 | "video_tracks": [ 10 | { 11 | "forced": false, 12 | "display_height": 1080, 13 | "language": "und", 14 | "default": true, 15 | "enabled": true, 16 | "number": 1, 17 | "crop": {}, 18 | "height": 1080, 19 | "width": 1920, 20 | "codec_id": "V_MPEG4/ISO/AVC", 21 | "lacing": false, 22 | "type": 1, 23 | "display_width": 1920, 24 | "interlaced": false 25 | } 26 | ], 27 | "tags": [], 28 | "chapters": [], 29 | "audio_tracks": [ 30 | { 31 | "forced": false, 32 | "name": "7.1Ch DTS-HD MA", 33 | "default": true, 34 | "enabled": true, 35 | "number": 2, 36 | "sampling_frequency": 48000.0, 37 | "channels": 6, 38 | "codec_id": "A_DTS", 39 | "lacing": true, 40 | "type": 2 41 | } 42 | ], 43 | "recurse_seek_head": false, 44 | "subtitle_tracks": [] 45 | } -------------------------------------------------------------------------------- /tests/data/mediainfo/test5.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Big Buck Bunny - test 8 2 | path: tests/data/videos/test5.mkv 3 | duration: '0:00:46.665000' 4 | size: 31762747 byte 5 | bit_rate: 5445237 6 | container: mkv 7 | video: 8 | - id: 1 9 | duration: '0:00:46.667000' 10 | width: 1024 pixel 11 | height: 576 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 576p 16 | frame_rate: 24.0 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: MAIN 20 | profile_level: '3.1' 21 | media_type: video/H264 22 | default: true 23 | audio: 24 | - id: 2 25 | duration: '0:00:46.665000' 26 | codec: AAC 27 | profile: LC 28 | channels_count: 2 29 | channels: '2.0' 30 | sampling_rate: 48000 31 | compression: LOSSY 32 | default: true 33 | - id: 10 34 | name: Commentary 35 | language: en 36 | duration: '0:00:46.665000' 37 | codec: AAC 38 | profile: LC 39 | channels_count: 1 40 | channels: '1.0' 41 | sampling_rate: 22050 42 | compression: LOSSY 43 | subtitle: 44 | - id: 3 45 | language: en 46 | format: SUBRIP 47 | default: true 48 | - id: 4 49 | language: hu 50 | format: SUBRIP 51 | - id: 5 52 | language: de 53 | format: SUBRIP 54 | - id: 6 55 | language: fr 56 | format: SUBRIP 57 | - id: 8 58 | language: es 59 | format: SUBRIP 60 | - id: 9 61 | language: it 62 | format: SUBRIP 63 | - id: 11 64 | language: ja 65 | format: SUBRIP 66 | - id: 7 67 | format: SUBRIP 68 | provider: 69 | name: mediainfo -------------------------------------------------------------------------------- /tests/test_mediainfo.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from tests import mediafiles 6 | from knowit import know 7 | 8 | from . import assert_expected, id_func 9 | 10 | 11 | @pytest.mark.parametrize('media', mediafiles.get_json_media('mediainfo'), ids=id_func) 12 | def test_mediainfo_provider(mediainfo, media, options): 13 | # Given 14 | mediainfo[media.video_path] = media.input_data 15 | 16 | # When 17 | actual = know(media.video_path, options) 18 | 19 | # Then 20 | assert_expected(media.expected_data, actual, options) 21 | assert pickle.loads(pickle.dumps(actual)) == actual 22 | 23 | 24 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mediainfo'), ids=id_func) 25 | def test_mediainfo_provider_real_media(media, options): 26 | # Given 27 | options['provider'] = 'mediainfo' 28 | 29 | # When 30 | actual = know(media.video_path, options) 31 | 32 | # Then 33 | assert_expected(media.expected_data, actual, options) 34 | assert pickle.loads(pickle.dumps(actual)) == actual 35 | 36 | 37 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mediainfo'), ids=id_func) 38 | def test_mediainfo_provider_real_media_cli(mediainfo_cli, media, options): 39 | # Given 40 | options['provider'] = 'mediainfo' 41 | 42 | # When 43 | actual = know(media.video_path, options) 44 | 45 | # Then 46 | assert_expected(media.expected_data, actual, options) 47 | assert pickle.loads(pickle.dumps(actual)) == actual 48 | -------------------------------------------------------------------------------- /knowit/units.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | class NullRegistry: 8 | """A NullRegistry that masquerades as a pint.UnitRegistry.""" 9 | 10 | def __init__(self): 11 | """Initialize a null registry.""" 12 | 13 | def __getattr__(self, item: typing.Any) -> int: 14 | """Return a Scalar 1 to simulate a unit.""" 15 | return 1 16 | 17 | def __call__(self, value: str) -> float: 18 | """Try converting to int, to float and fallback to a scalar 1.0.""" 19 | try: 20 | return int(value) 21 | except ValueError: 22 | try: 23 | return float(value) 24 | except ValueError: 25 | pass 26 | return 1 27 | 28 | def __bool__(self): 29 | """Return False since a NullRegistry is not a pint.UnitRegistry.""" 30 | return False 31 | 32 | def define(self, *args, **kwargs): 33 | """Pretend to add unit to the registry.""" 34 | 35 | 36 | def _build_unit_registry(): 37 | try: 38 | import pint 39 | 40 | registry = pint.UnitRegistry() 41 | registry.define('FPS = 1 * hertz') 42 | 43 | pint.set_application_registry(registry) 44 | return registry 45 | except ModuleNotFoundError: 46 | pass 47 | except Exception: 48 | logger.exception("Cannot import the pint package") 49 | 50 | return NullRegistry() 51 | 52 | 53 | units = _build_unit_registry() 54 | -------------------------------------------------------------------------------- /knowit/properties/audio.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from knowit.core import Configurable, Property 4 | 5 | 6 | class BitRateMode(Configurable[str]): 7 | """Bit Rate mode property.""" 8 | 9 | 10 | class AudioCompression(Configurable[str]): 11 | """Audio Compression property.""" 12 | 13 | 14 | class AudioProfile(Configurable[str]): 15 | """Audio profile property.""" 16 | 17 | 18 | class AudioChannels(Property[int]): 19 | """Audio Channels property.""" 20 | 21 | ignored = { 22 | 'object based', # Dolby Atmos 23 | } 24 | 25 | def handle(self, value: typing.Union[int, str], context: typing.MutableMapping) -> typing.Optional[int]: 26 | """Handle audio channels.""" 27 | if isinstance(value, int): 28 | return value 29 | 30 | if value.lower() not in self.ignored: 31 | try: 32 | return int(value) 33 | except ValueError: 34 | self.report(value, context) 35 | return None 36 | 37 | 38 | class AudioCodec(Configurable[str]): 39 | """Audio codec property.""" 40 | 41 | @classmethod 42 | def _extract_key(cls, value) -> str: 43 | key = str(value).upper() 44 | if key.startswith('A_'): 45 | key = key[2:] 46 | 47 | # only the first part of the word. E.g.: 'AAC LC' => 'AAC' 48 | return key.split(' ')[0] 49 | 50 | @classmethod 51 | def _extract_fallback_key(cls, value, key) -> typing.Optional[str]: 52 | if '/' in key: 53 | return key.split('/')[0] 54 | else: 55 | return None 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | .venv/ 13 | .pytest_cache/ 14 | .mypy_cache/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | .venv*/ 86 | venv*/ 87 | ENV/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # IDE 96 | .idea 97 | 98 | # binaries 99 | *.mkv 100 | -------------------------------------------------------------------------------- /tests/data/mediainfo/media_001.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Media 001 2 | path: tests/data/mediainfo/media_001.mkv 3 | duration: 3261.384 4 | size: 28071815971 5 | bit_rate: 68858659 6 | container: mkv 7 | video: 8 | - id: 1 9 | language: en 10 | duration: 3259.381 11 | size: 25355039372 12 | width: 3840 13 | height: 2160 14 | scan_type: PROGRESSIVE 15 | aspect_ratio: 1.778 16 | pixel_aspect_ratio: 1.0 17 | resolution: 2160p 18 | frame_rate: 23.976 19 | bit_rate: 62232772 20 | bit_depth: 10 21 | codec: H265 22 | profile: MAIN10 23 | profile_level: 5.1 24 | hdr_format: 25 | - DV 26 | - HDR10 27 | media_type: video/H265 28 | audio: 29 | - id: 2 30 | language: en 31 | duration: 3259.381 32 | size: 1912453744 33 | codec: 34 | - TRUEHD 35 | - ATMOS 36 | channels_count: 8 37 | channels: 7.1 38 | bit_rate: 4694029 39 | bit_rate_mode: VBR 40 | sampling_rate: 48000 41 | compression: LOSSLESS 42 | default: true 43 | - id: 3 44 | language: en 45 | duration: 3259.392 46 | size: 260751360 47 | codec: AC3 48 | channels_count: 6 49 | channels: 5.1 50 | bit_rate: 640000 51 | bit_rate_mode: CBR 52 | sampling_rate: 48000 53 | compression: LOSSY 54 | - id: 4 55 | language: de 56 | duration: 3258.272 57 | size: 182463232 58 | codec: AC3 59 | channels_count: 6 60 | channels: 5.1 61 | bit_rate: 448000 62 | bit_rate_mode: CBR 63 | sampling_rate: 48000 64 | compression: LOSSY 65 | - id: 5 66 | language: pt 67 | duration: 3259.382 68 | size: 312900608 69 | codec: DTS 70 | channels_count: 2 71 | channels: 2.0 72 | bit_depth: 24 73 | bit_rate: 768000 74 | bit_rate_mode: CBR 75 | sampling_rate: 48000 76 | compression: LOSSY 77 | subtitle: 78 | - id: 6 79 | language: en 80 | format: PGS 81 | - id: 7 82 | language: de 83 | format: PGS 84 | - id: 8 85 | language: pt 86 | format: PGS 87 | provider: 88 | name: mediainfo 89 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "knowit" 3 | version = "0.5.11" 4 | description = "Know better your media files" 5 | authors = [ 6 | "Rato", 7 | ] 8 | license = "MIT" 9 | readme = "README.md" 10 | repository = "https://github.com/ratoaq2/knowit" 11 | keywords = [ 12 | "video", 13 | "mkv", 14 | "mp4", 15 | "mediainfo", 16 | "metadata", 17 | "movie", 18 | "episode", 19 | "tv", 20 | "shows", 21 | "series", 22 | ] 23 | classifiers = [ 24 | "Development Status :: 5 - Production/Stable", 25 | "Intended Audience :: Developers", 26 | "License :: OSI Approved :: MIT License", 27 | "Operating System :: OS Independent", 28 | "Programming Language :: Python", 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3 :: Only", 31 | "Programming Language :: Python :: 3.9", 32 | "Programming Language :: Python :: 3.10", 33 | "Programming Language :: Python :: 3.11", 34 | "Programming Language :: Python :: 3.12", 35 | "Programming Language :: Python :: 3.13", 36 | "Topic :: Software Development :: Libraries :: Python Modules", 37 | "Topic :: Multimedia :: Video", 38 | ] 39 | include = [ 40 | { path = "scripts/**", format = "sdist" }, 41 | { path = "tests/**", format = "sdist" }, 42 | { path = "setup.cfg", format = "sdist" }, 43 | ] 44 | 45 | [tool.poetry.scripts] 46 | knowit = "knowit.__main__:main" 47 | 48 | [tool.poetry.dependencies] 49 | python = ">=3.9,<4.0" 50 | babelfish = "^0.6.1" 51 | enzyme = "^0.5.2" 52 | pint = { version = ">=0.20.1,<0.25.0", optional = true } 53 | pymediainfo = "^7.0.1" 54 | pyyaml = "^6.0" 55 | trakit = "^0.2.2" 56 | 57 | [tool.poetry.group.dev.dependencies] 58 | pytest = "^8.4.1" 59 | pytest-cov = "^6.2.1" 60 | flake8 = "^7.3.0" 61 | requests = "^2.32.4" 62 | flake8-docstrings = "^1.7.0" 63 | flake8-import-order = "^0.19.2" 64 | pep8-naming = "^0.15.1" 65 | pydocstyle = "^6.3.0" 66 | mypy = "^1.17.0" 67 | types-requests = "^2.32.4.20250611" 68 | types-mock = "^5.2.0.20250516" 69 | typing-extensions = "^4.14.1" 70 | 71 | [tool.poetry.extras] 72 | pint = ["pint"] 73 | 74 | [build-system] 75 | requires = ["poetry-core"] 76 | build-backend = "poetry.core.masonry.api" 77 | -------------------------------------------------------------------------------- /knowit/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | from logging import NullHandler, getLogger 4 | 5 | import yaml 6 | 7 | try: 8 | from importlib.resources import files 9 | except ImportError: 10 | from importlib_resources import files # type: ignore[assignment,no-redef,import-not-found] 11 | 12 | from knowit.serializer import get_yaml_loader 13 | 14 | logger = getLogger(__name__) 15 | logger.addHandler(NullHandler()) 16 | 17 | 18 | class _Value(typing.NamedTuple): 19 | code: str 20 | default: str 21 | human: str 22 | technical: str 23 | 24 | 25 | _valid_aliases = _Value._fields 26 | 27 | 28 | class Config: 29 | """Application config class.""" 30 | 31 | @classmethod 32 | def build(cls, path: typing.Optional[typing.Union[str, os.PathLike]] = None) -> 'Config': 33 | """Build config instance.""" 34 | loader = get_yaml_loader() 35 | config_file = files(__package__).joinpath('defaults.yml') 36 | with config_file.open('rb') as stream: 37 | cfgs = [yaml.load(stream, Loader=loader)] 38 | 39 | if path: 40 | with open(path, 'rb') as stream: 41 | cfgs.append(yaml.load(stream, Loader=loader)) 42 | 43 | profiles_data = {} 44 | for cfg in cfgs: 45 | if 'profiles' in cfg: 46 | profiles_data.update(cfg['profiles']) 47 | 48 | knowledge_data = {} 49 | for cfg in cfgs: 50 | if 'knowledge' in cfg: 51 | knowledge_data.update(cfg['knowledge']) 52 | 53 | data: typing.Dict[str, typing.MutableMapping] = {'general': {}} 54 | for class_name, data_map in knowledge_data.items(): 55 | data.setdefault(class_name, {}) 56 | for code, detection_values in data_map.items(): 57 | alias_map = (profiles_data.get(class_name) or {}).get(code) or {} 58 | alias_map.setdefault('code', code) 59 | alias_map.setdefault('default', alias_map['code']) 60 | alias_map.setdefault('human', alias_map['default']) 61 | alias_map.setdefault('technical', alias_map['human']) 62 | value = _Value(**{k: v for k, v in alias_map.items() if k in _valid_aliases}) 63 | for detection_value in detection_values: 64 | data[class_name][str(detection_value)] = value 65 | 66 | config = Config() 67 | config.__dict__ = data 68 | return config 69 | -------------------------------------------------------------------------------- /knowit/rules/video.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | 3 | from knowit.core import Rule 4 | 5 | 6 | class ResolutionRule(Rule): 7 | """Resolution rule.""" 8 | 9 | standard_resolutions = ( 10 | 480, 11 | 720, 12 | 1080, 13 | 2160, 14 | 4320, 15 | ) 16 | uncommon_resolutions = ( 17 | 240, 18 | 288, 19 | 360, 20 | 576, 21 | ) 22 | resolutions = list(sorted(standard_resolutions + uncommon_resolutions)) 23 | square = 4. / 3 24 | wide = 16. / 9 25 | 26 | def execute(self, props, pv_props, context): 27 | """Return the resolution for the video. 28 | 29 | The resolution is based on a widescreen TV (16:9) 30 | 1920x800 will be considered 1080p since the TV will use 1920x1080 with vertical black bars 31 | 1426x1080 is considered 1080p since the TV will use 1920x1080 with horizontal black bars 32 | 33 | The calculation considers the display aspect ratio and the pixel aspect ratio (not only width and height). 34 | The upper resolution is selected if there's no perfect match with the following list of resolutions: 35 | 240, 288, 360, 480, 576, 720, 1080, 2160, 4320 36 | If no interlaced information is available, resolution will be considered Progressive. 37 | """ 38 | width = props.get('width') 39 | height = props.get('height') 40 | if not width or not height: 41 | return 42 | 43 | try: 44 | width = width.magnitude 45 | height = height.magnitude 46 | except AttributeError: 47 | pass 48 | 49 | dar = props.get('aspect_ratio', Decimal(width) / height) 50 | par = props.get('pixel_aspect_ratio', 1) 51 | scan_type = props.get('scan_type', 'p')[0].lower() 52 | 53 | # selected DAR must be between 4:3 and 16:9 54 | selected_dar = max(min(dar, self.wide), self.square) 55 | 56 | # mod-16 57 | stretched_width = int(round(width * par / 16)) * 16 58 | 59 | # mod-8 60 | calculated_height = int(round(stretched_width / selected_dar / 8)) * 8 61 | 62 | selected_resolution = None 63 | for r in reversed(self.resolutions): 64 | if r < calculated_height: 65 | break 66 | 67 | selected_resolution = r 68 | 69 | if selected_resolution: 70 | return f'{selected_resolution}{scan_type}' 71 | 72 | msg = f'{width}x{height} - scan_type: {scan_type}, aspect_ratio: {dar}, pixel_aspect_ratio: {par}' 73 | self.report(msg, context) 74 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | from unittest.mock import patch 4 | import pytest 5 | 6 | from knowit.utils import build_path_candidates, detect_os 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'os_name, sys_platform, expected', [ 11 | ('nt', None, 'windows'), 12 | ('dos', None, 'windows'), 13 | ('os2', None, 'windows'), 14 | ('ce', None, 'windows'), 15 | (None, 'darwin', 'macos'), 16 | (None, None, 'unix'), 17 | ] 18 | ) 19 | def test_detect_os(os_name, sys_platform, expected): 20 | with patch('knowit.utils.os') as mock_os: 21 | mock_os.name = os_name 22 | with patch('knowit.utils.sys') as mock_sys: 23 | mock_sys.platform = sys_platform 24 | assert detect_os() == expected 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'os_family, path, names, expected', [ 29 | ( 30 | 'windows', 31 | r'C:\Application;C:\Program Files\Application', 32 | ('some.dll', 'some.exe', 'another.exe'), 33 | [ 34 | r'C:\Application\some.dll', 35 | r'C:\Application\some.exe', 36 | r'C:\Application\another.exe', 37 | r'C:\Program Files\Application\some.dll', 38 | r'C:\Program Files\Application\some.exe', 39 | r'C:\Program Files\Application\another.exe', 40 | r'some.dll', 41 | r'some.exe', 42 | r'another.exe', 43 | ], 44 | ), 45 | ( 46 | 'macos', 47 | '/usr/sbin:/usr/bin:/sbin:/bin', 48 | ('some.dll', 'binary', 'another_binary'), 49 | [ 50 | 'some.dll', 51 | 'binary', 52 | 'another_binary', 53 | ], 54 | ), 55 | ( 56 | 'linux', 57 | '/usr/sbin:/usr/bin:/sbin:/bin', 58 | ('some.dll', 'binary', 'another_binary'), 59 | [ 60 | 'some.dll', 61 | 'binary', 62 | 'another_binary', 63 | ], 64 | ), 65 | ], 66 | ) 67 | def test_build_path_candidates_for_specified_os(names, os_family, path, expected): 68 | with patch('knowit.utils.os') as mock_os: 69 | mock_os.environ = {'PATH': path} 70 | mock_os.path = os.path # don't mock os.path functions 71 | candidates = build_path_candidates(names, os_family) 72 | 73 | def normalize_paths(paths: typing.Iterable[str]): 74 | """Replace all slashes to a forward slash for comparison purposes.""" 75 | return [p.replace('\\', '/') for p in paths] 76 | 77 | assert normalize_paths(candidates) == normalize_paths(expected) 78 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/media_001.mkv.yml: -------------------------------------------------------------------------------- 1 | title: Super Title 2 | path: videofile.mkv 3 | container: mkv 4 | duration: '1:23:45.670000' 5 | size: 12345678901 byte 6 | bit_rate: 1231233 bps 7 | video: 8 | - id: 0 9 | name: Super Title 10 | width: 1920 pixel 11 | height: 1080 pixel 12 | scan_type: PROGRESSIVE 13 | aspect_ratio: 1.778 14 | pixel_aspect_ratio: 1.0 15 | resolution: 1080p 16 | frame_rate: 23.976 FPS 17 | bit_depth: 8 bit 18 | codec: H264 19 | profile: HIGH 20 | default: true 21 | audio: 22 | - id: 1 23 | name: Super Title 24 | language: en 25 | codec: DTSHD 26 | profile: MA 27 | channels_count: 8 28 | channels: '7.1' 29 | bit_depth: 24 bit 30 | sampling_rate: 48000 Hz 31 | default: true 32 | - id: 2 33 | name: Super Title 34 | language: en 35 | codec: DTS 36 | channels_count: 6 37 | channels: '5.1' 38 | bit_rate: 1536000 bps 39 | sampling_rate: 48000 Hz 40 | - id: 3 41 | name: Super Title 42 | language: en 43 | codec: AC3 44 | channels_count: 2 45 | channels: '2.0' 46 | bit_rate: 320000 bps 47 | sampling_rate: 48000 Hz 48 | - id: 4 49 | name: Super Title 50 | language: fr 51 | codec: DTSHD 52 | profile: HRA 53 | channels_count: 6 54 | channels: '5.1' 55 | sampling_rate: 48000 Hz 56 | - id: 5 57 | name: Super Title 58 | language: fr 59 | codec: DTS 60 | channels_count: 6 61 | channels: '5.1' 62 | bit_rate: 1536000 bps 63 | sampling_rate: 48000 Hz 64 | - id: 6 65 | name: Super Title 66 | language: cs 67 | codec: AC3 68 | channels_count: 6 69 | channels: '5.1' 70 | bit_rate: 640000 bps 71 | sampling_rate: 48000 Hz 72 | - id: 7 73 | name: Super Title 74 | language: hi 75 | codec: AC3 76 | channels_count: 6 77 | channels: '5.1' 78 | bit_rate: 640000 bps 79 | sampling_rate: 48000 Hz 80 | subtitle: 81 | - id: 8 82 | name: English-PGS 83 | language: en 84 | format: PGS 85 | - id: 9 86 | name: French-PGS 87 | language: fr 88 | format: PGS 89 | - id: 10 90 | name: Czech-PGS 91 | language: cs 92 | format: PGS 93 | - id: 11 94 | name: Dutch-PGS 95 | language: nl 96 | format: PGS 97 | - id: 12 98 | name: Arabic-PGS 99 | language: ar 100 | format: PGS 101 | - id: 13 102 | name: Danish-PGS 103 | language: da 104 | format: PGS 105 | - id: 14 106 | name: Finnish-PGS 107 | language: fi 108 | format: PGS 109 | - id: 15 110 | name: Norwegian-PGS 111 | language: "no" 112 | format: PGS 113 | - id: 16 114 | name: Swedish-PGS 115 | language: sv 116 | format: PGS 117 | - id: 17 118 | name: French-FORCED-PGS 119 | language: fr 120 | format: PGS 121 | - id: 18 122 | name: Czech-FORCED-PGS 123 | language: cs 124 | format: PGS 125 | - id: 19 126 | name: Hindi-FORCED-PGS 127 | language: hi 128 | format: PGS 129 | provider: 130 | name: ffmpeg 131 | 132 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | 5 | from knowit import api 6 | from knowit.config import Config 7 | from knowit.providers import EnzymeProvider 8 | from knowit.providers.ffmpeg import FFmpegCliExecutor, FFmpegExecutor 9 | from knowit.providers.mediainfo import MediaInfoCTypesExecutor, MediaInfoCliExecutor, MediaInfoExecutor 10 | from knowit.providers.mkvmerge import MkvMergeCliExecutor, MkvMergeExecutor 11 | 12 | 13 | @pytest.fixture 14 | def context(): 15 | return { 16 | 'profile': 'default', 17 | } 18 | 19 | 20 | @pytest.fixture 21 | def config(): 22 | return Config.build() 23 | 24 | 25 | @pytest.fixture 26 | def options(): 27 | return {'profile': 'code'} 28 | 29 | 30 | def setup_mediainfo(executor, monkeypatch, options): 31 | assert executor 32 | options['provider'] = 'mediainfo' 33 | api.available_providers.clear() 34 | get_executor = Mock() 35 | get_executor.return_value = executor 36 | monkeypatch.setattr(MediaInfoExecutor, 'get_executor_instance', get_executor) 37 | 38 | data = {} 39 | extract_info = executor.extract_info 40 | monkeypatch.setattr(executor, 'extract_info', 41 | lambda filename: data[filename] if filename in data else extract_info(filename)) 42 | return data 43 | 44 | 45 | @pytest.fixture 46 | def mediainfo_cli(monkeypatch, options): 47 | return setup_mediainfo(MediaInfoCliExecutor.create(), monkeypatch, options) 48 | 49 | 50 | @pytest.fixture 51 | def mediainfo(monkeypatch, options): 52 | return setup_mediainfo(MediaInfoCTypesExecutor.create(), monkeypatch, options) 53 | 54 | 55 | @pytest.fixture 56 | def ffmpeg(monkeypatch, options): 57 | options['provider'] = 'ffmpeg' 58 | api.available_providers.clear() 59 | executor = FFmpegCliExecutor.create() 60 | get_executor = Mock() 61 | get_executor.return_value = executor 62 | monkeypatch.setattr(FFmpegExecutor, 'get_executor_instance', get_executor) 63 | 64 | data = {} 65 | extract_info = executor.extract_info 66 | monkeypatch.setattr(executor, 'extract_info', 67 | lambda filename: data[filename] if filename in data else extract_info(filename)) 68 | return data 69 | 70 | 71 | @pytest.fixture 72 | def mkvmerge(monkeypatch, options): 73 | options['provider'] = 'mkvmerge' 74 | api.available_providers.clear() 75 | executor = MkvMergeCliExecutor.create() 76 | get_executor = Mock() 77 | get_executor.return_value = executor 78 | monkeypatch.setattr(MkvMergeExecutor, 'get_executor_instance', get_executor) 79 | 80 | data = {} 81 | extract_info = executor.extract_info 82 | monkeypatch.setattr(executor, 'extract_info', 83 | lambda filename: data[filename] if filename in data else extract_info(filename)) 84 | return data 85 | 86 | 87 | @pytest.fixture 88 | def enzyme(monkeypatch, options): 89 | options['provider'] = 'enzyme' 90 | 91 | data = {} 92 | extract_info = EnzymeProvider.extract_info 93 | monkeypatch.setattr(EnzymeProvider, 'extract_info', 94 | lambda cls, filename: data[filename] if filename in data else extract_info(filename)) 95 | 96 | return data 97 | -------------------------------------------------------------------------------- /tests/data/mediainfo/several-tracks.mkv.yml: -------------------------------------------------------------------------------- 1 | path: tests/data/several-tracks.mkv 2 | duration: 2353.6 3 | size: 5796819279 4 | bit_rate: 19703669 5 | container: mkv 6 | video: 7 | - id: 1 8 | language: en 9 | duration: 2353.584 10 | size: 5569279175 11 | width: 3840 12 | height: 2160 13 | scan_type: PROGRESSIVE 14 | aspect_ratio: 1.778 15 | pixel_aspect_ratio: 1.0 16 | resolution: 2160p 17 | frame_rate: 24.0 18 | bit_rate: 18930377 19 | bit_depth: 8 20 | codec: H265 21 | profile: MAIN 22 | profile_level: '5' 23 | encoder: X265 24 | media_type: video/H265 25 | default: true 26 | audio: 27 | - id: 2 28 | language: en 29 | duration: 2353.6 30 | size: 225945600 31 | codec: 32 | - EAC3 33 | - ATMOS 34 | channels_count: 6 35 | channels: 5.1 36 | bit_rate: 768000 37 | bit_rate_mode: CBR 38 | sampling_rate: 48000 39 | compression: LOSSY 40 | default: true 41 | subtitle: 42 | - id: 3 43 | name: English (SDH) 44 | language: en 45 | hearing_impaired: true 46 | format: SUBRIP 47 | - id: 4 48 | name: Čeština 49 | language: cs 50 | format: SUBRIP 51 | - id: 5 52 | name: Dansk 53 | language: da 54 | format: SUBRIP 55 | - id: 6 56 | name: Deutsch 57 | language: de 58 | format: SUBRIP 59 | - id: 7 60 | name: Greek 61 | language: el 62 | format: SUBRIP 63 | - id: 8 64 | name: Español 65 | language: es 66 | format: SUBRIP 67 | - id: 9 68 | name: Español (Latinoamericano) 69 | language: es-419 70 | format: SUBRIP 71 | - id: 10 72 | name: Suomi 73 | language: fi 74 | format: SUBRIP 75 | - id: 11 76 | name: Français (Canadien) 77 | language: fr-CA 78 | format: SUBRIP 79 | - id: 12 80 | name: Français 81 | language: fr 82 | format: SUBRIP 83 | - id: 13 84 | name: Magyar 85 | language: hu 86 | format: SUBRIP 87 | - id: 14 88 | name: Italiano 89 | language: it 90 | format: SUBRIP 91 | - id: 15 92 | name: Japanese 93 | language: ja 94 | format: SUBRIP 95 | - id: 16 96 | name: Korean 97 | language: ko 98 | format: SUBRIP 99 | - id: 17 100 | name: Nederlands 101 | language: nl 102 | format: SUBRIP 103 | - id: 18 104 | name: Norsk 105 | language: 'no' 106 | format: SUBRIP 107 | - id: 19 108 | name: Polski 109 | language: pl 110 | format: SUBRIP 111 | - id: 20 112 | name: Português 113 | language: pt 114 | format: SUBRIP 115 | - id: 21 116 | name: Português (Brasil) 117 | language: pt-BR 118 | format: SUBRIP 119 | - id: 22 120 | name: Română 121 | language: ro 122 | format: SUBRIP 123 | - id: 23 124 | name: Slovenčina 125 | language: sk 126 | format: SUBRIP 127 | - id: 24 128 | name: Svenska 129 | language: sv 130 | format: SUBRIP 131 | - id: 25 132 | name: Türkçe 133 | language: tr 134 | format: SUBRIP 135 | - id: 26 136 | name: Chinese (Hong Kong) 137 | language: zh-HK 138 | format: SUBRIP 139 | - id: 27 140 | name: Chinese (Traditional) 141 | language: zh-Hant 142 | format: SUBRIP 143 | - id: 28 144 | name: Chinese (Simplified) 145 | language: zh-Hans 146 | format: SUBRIP 147 | - id: 29 148 | name: Indonesian 149 | language: id 150 | format: SUBRIP 151 | - id: 30 152 | name: Malay 153 | language: ms 154 | format: SUBRIP 155 | - id: 31 156 | name: Thai 157 | language: th 158 | format: SUBRIP 159 | - id: 32 160 | name: Chinese 161 | language: zh 162 | format: SUBRIP 163 | provider: 164 | name: mediainfo -------------------------------------------------------------------------------- /knowit/properties/video.py: -------------------------------------------------------------------------------- 1 | import re 2 | import typing 3 | from decimal import Decimal 4 | 5 | from knowit.core import Configurable 6 | from knowit.core import Property 7 | from knowit.utils import round_decimal 8 | 9 | 10 | class VideoCodec(Configurable[str]): 11 | """Video Codec handler.""" 12 | 13 | @classmethod 14 | def _extract_key(cls, value) -> str: 15 | key = value.upper().split('/')[-1] 16 | if key.startswith('V_'): 17 | key = key[2:] 18 | 19 | return key.split(' ')[-1] 20 | 21 | 22 | class VideoDimensions(Property[int]): 23 | """Dimensions property.""" 24 | 25 | def __init__(self, *args: str, dimension='width' or 'height', **kwargs): 26 | """Initialize the object.""" 27 | super().__init__(*args, **kwargs) 28 | self.dimension = dimension 29 | 30 | dimensions_re = re.compile(r'(?P\d+)x(?P\d+)') 31 | 32 | def handle(self, value, context) -> typing.Optional[int]: 33 | """Handle ratio.""" 34 | match = self.dimensions_re.match(value) 35 | if match: 36 | match_dict = match.groupdict() 37 | try: 38 | value = match_dict[self.dimension] 39 | except KeyError: 40 | pass 41 | else: 42 | return int(value) 43 | 44 | self.report(value, context) 45 | return None 46 | 47 | 48 | class VideoEncoder(Configurable): 49 | """Video Encoder property.""" 50 | 51 | 52 | class VideoHdrFormat(Configurable): 53 | """Video HDR Format property.""" 54 | 55 | 56 | class VideoProfile(Configurable[str]): 57 | """Video Profile property.""" 58 | 59 | @classmethod 60 | def _extract_key(cls, value) -> str: 61 | return value.upper().split('@')[0] 62 | 63 | 64 | class VideoProfileLevel(Configurable[str]): 65 | """Video Profile Level property.""" 66 | 67 | @classmethod 68 | def _extract_key(cls, value) -> typing.Union[str, bool]: 69 | values = str(value).upper().split('@') 70 | if len(values) > 1: 71 | value = values[1] 72 | return value 73 | 74 | # There's no level, so don't warn or report it 75 | return False 76 | 77 | 78 | class VideoProfileTier(Configurable[str]): 79 | """Video Profile Tier property.""" 80 | 81 | @classmethod 82 | def _extract_key(cls, value) -> typing.Union[str, bool]: 83 | values = str(value).upper().split('@') 84 | if len(values) > 2: 85 | return values[2] 86 | 87 | # There's no tier, so don't warn or report it 88 | return False 89 | 90 | 91 | class Ratio(Property[Decimal]): 92 | """Ratio property.""" 93 | 94 | def __init__(self, *args: str, unit=None, **kwargs): 95 | """Initialize the object.""" 96 | super().__init__(*args, **kwargs) 97 | self.unit = unit 98 | 99 | ratio_re = re.compile(r'(?P\d+)[:/](?P\d+)') 100 | 101 | def handle(self, value, context) -> typing.Optional[Decimal]: 102 | """Handle ratio.""" 103 | match = self.ratio_re.match(value) 104 | if match: 105 | width, height = match.groups() 106 | if (width, height) == ('0', '1'): # identity 107 | return Decimal('1.0') 108 | 109 | if height: 110 | result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3) 111 | if self.unit: 112 | result *= self.unit 113 | 114 | return result 115 | 116 | self.report(value, context) 117 | return None 118 | 119 | 120 | class ScanType(Configurable[str]): 121 | """Scan Type property.""" 122 | -------------------------------------------------------------------------------- /knowit/rules/audio.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from decimal import Decimal, InvalidOperation 3 | from logging import NullHandler, getLogger 4 | 5 | from knowit.core import Rule 6 | 7 | logger = getLogger(__name__) 8 | logger.addHandler(NullHandler()) 9 | 10 | 11 | class AtmosRule(Rule): 12 | """Atmos rule.""" 13 | 14 | def __init__(self, config: typing.Mapping[str, typing.Mapping], name: str, 15 | **kwargs): 16 | """Initialize an Atmos rule.""" 17 | super().__init__(name, **kwargs) 18 | self.audio_codecs = getattr(config, 'AudioCodec') 19 | 20 | def execute(self, props, pv_props, context): 21 | """Execute the rule against properties.""" 22 | profile = context.get('profile') or 'default' 23 | format_commercial = pv_props.get('format_commercial') 24 | if 'codec' in props and format_commercial and 'atmos' in format_commercial.lower(): 25 | props['codec'] = [props['codec'], 26 | getattr(self.audio_codecs['ATMOS'], profile)] 27 | 28 | 29 | class AudioChannelsRule(Rule): 30 | """Audio Channel rule.""" 31 | 32 | mapping = { 33 | 1: '1.0', 34 | 2: '2.0', 35 | 6: '5.1', 36 | 8: '7.1', 37 | } 38 | 39 | def execute(self, props, pv_props, context): 40 | """Execute the rule against properties.""" 41 | count = props.get('channels_count') 42 | if count is None: 43 | return 44 | 45 | channels = self.mapping.get(count) if isinstance(count, int) else None 46 | positions = pv_props.get('channel_positions') or [] 47 | positions = positions if isinstance(positions, list) else [positions] 48 | candidate = 0 49 | for position in positions: 50 | if not position: 51 | continue 52 | 53 | c = Decimal('0.0') 54 | for i in position.split('/'): 55 | try: 56 | c += Decimal(i.replace('.?', '')) 57 | except (ValueError, InvalidOperation): 58 | logger.debug('Invalid %s: %s', self.description, i) 59 | pass 60 | 61 | c_count = int(c) + int(round((c - int(c)) * 10)) 62 | if c_count == count: 63 | return str(c) 64 | 65 | candidate = max(candidate, c) 66 | 67 | if channels: 68 | return channels 69 | 70 | if candidate: 71 | return candidate 72 | 73 | self.report(positions, context) 74 | 75 | 76 | class DtsHdRule(Rule): 77 | """DTS-HD rule.""" 78 | 79 | def __init__(self, config: typing.Mapping[str, typing.Mapping], name: str, 80 | **kwargs): 81 | """Initialize a DTS-HD Rule.""" 82 | super().__init__(name, **kwargs) 83 | self.audio_codecs = getattr(config, 'AudioCodec') 84 | self.audio_profiles = getattr(config, 'AudioProfile') 85 | 86 | @classmethod 87 | def _redefine(cls, props, name, index): 88 | actual = props.get(name) 89 | if isinstance(actual, list): 90 | value = actual[index] 91 | if value is None: 92 | del props[name] 93 | else: 94 | props[name] = value 95 | 96 | def execute(self, props, pv_props, context): 97 | """Execute the rule against properties.""" 98 | profile = context.get('profile') or 'default' 99 | 100 | if props.get('codec') == getattr(self.audio_codecs['DTS'], 101 | profile) and props.get('profile') in ( 102 | getattr(self.audio_profiles['MA'], profile), 103 | getattr(self.audio_profiles['HRA'], profile)): 104 | props['codec'] = getattr(self.audio_codecs['DTS-HD'], profile) 105 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "streams": [ 3 | { 4 | "index": 0, 5 | "codec_name": "h264", 6 | "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10", 7 | "profile": "Main", 8 | "codec_type": "video", 9 | "codec_time_base": "1001/48000", 10 | "codec_tag_string": "[0][0][0][0]", 11 | "codec_tag": "0x0000", 12 | "width": 1920, 13 | "height": 1080, 14 | "coded_width": 1920, 15 | "coded_height": 1080, 16 | "has_b_frames": 1, 17 | "sample_aspect_ratio": "1:1", 18 | "display_aspect_ratio": "16:9", 19 | "pix_fmt": "yuv420p", 20 | "level": 40, 21 | "chroma_location": "left", 22 | "field_order": "progressive", 23 | "refs": 1, 24 | "is_avc": "true", 25 | "nal_length_size": "4", 26 | "r_frame_rate": "24000/1001", 27 | "avg_frame_rate": "24000/1001", 28 | "time_base": "1/1000", 29 | "start_pts": 0, 30 | "start_time": "0:00:00.000000", 31 | "bits_per_raw_sample": "8", 32 | "disposition": { 33 | "default": 1, 34 | "dub": 0, 35 | "original": 0, 36 | "comment": 0, 37 | "lyrics": 0, 38 | "karaoke": 0, 39 | "forced": 0, 40 | "hearing_impaired": 0, 41 | "visual_impaired": 0, 42 | "clean_effects": 0, 43 | "attached_pic": 0, 44 | "timed_thumbnails": 0 45 | } 46 | }, 47 | { 48 | "index": 1, 49 | "codec_name": "dts", 50 | "codec_long_name": "DCA (DTS Coherent Acoustics)", 51 | "profile": "DTS-HD MA", 52 | "codec_type": "audio", 53 | "codec_time_base": "1/48000", 54 | "codec_tag_string": "[0][0][0][0]", 55 | "codec_tag": "0x0000", 56 | "sample_fmt": "s32p", 57 | "sample_rate": "48000", 58 | "channels": 8, 59 | "channel_layout": "7.1", 60 | "bits_per_sample": 0, 61 | "r_frame_rate": "0/0", 62 | "avg_frame_rate": "0/0", 63 | "time_base": "1/1000", 64 | "start_pts": 0, 65 | "start_time": "0:00:00.000000", 66 | "bits_per_raw_sample": "24", 67 | "disposition": { 68 | "default": 1, 69 | "dub": 0, 70 | "original": 0, 71 | "comment": 0, 72 | "lyrics": 0, 73 | "karaoke": 0, 74 | "forced": 0, 75 | "hearing_impaired": 0, 76 | "visual_impaired": 0, 77 | "clean_effects": 0, 78 | "attached_pic": 0, 79 | "timed_thumbnails": 0 80 | }, 81 | "tags": { 82 | "language": "eng", 83 | "title": "7.1Ch DTS-HD MA" 84 | } 85 | } 86 | ], 87 | "format": { 88 | "filename": "tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv", 89 | "nb_streams": 2, 90 | "nb_programs": 0, 91 | "format_name": "matroska,webm", 92 | "format_long_name": "Matroska / WebM", 93 | "start_time": "0:00:00.000000", 94 | "duration": "0:01:37.931000", 95 | "size": "40772443", 96 | "bit_rate": "3330707", 97 | "probe_score": 100, 98 | "tags": { 99 | "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File", 100 | "encoder": "libebml v1.3.0 + libmatroska v1.4.1", 101 | "creation_time": "2013-12-13T17:49:28.000000Z" 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /tests/test_properties.yml: -------------------------------------------------------------------------------- 1 | VideoCodec: 2 | H.264: 3 | - V_MPEG4/ISO/AVC 4 | - AVC 5 | Microsoft MPEG-4 v2: MP42 6 | WMV 7: WMV1 7 | WebCam JPEG: CJPG 8 | H.265: V_MPEGH/ISO/HEVC 9 | Xvid: XVID 10 | MPEG-1: MPEG-1V 11 | VP8: V_VP8 12 | VC-1: 13 | - VC-1 14 | - WMV3 15 | WMV 8: WMV2 16 | 17 | VideoEncoder: 18 | DivX: DivX 19 | x264: x264 20 | x265: x265 21 | Xvid: XviD 22 | Vimeo: Vimeo Encoder 23 | 24 | VideoProfile: 25 | Advanced: Advanced@L3 26 | Advanced Simple: Advanced Simple@L5 27 | Simple: 28 | - Simple@L1 29 | - Simple@L3 30 | Baseline: 31 | - Baseline@L1.0 32 | - Baseline@L1.1 33 | - Baseline@L1.3 34 | - Baseline@L2 35 | - Baseline@L2.1 36 | - Baseline@L3 37 | - Baseline@L3.0 38 | - Baseline@L3.2 39 | - Baseline@L4.1 40 | Main: 41 | - Main 42 | - Main@Main 43 | - Main@L1.3 44 | - Main@L2.1 45 | - Main@L3 46 | - Main@L3.0 47 | - Main@L3.1 48 | - Main@L3.1@Main 49 | - Main@L3.2 50 | - Main@L4 51 | - Main@L4.0 52 | - Main@L4@Main 53 | - Main@L5@Main 54 | - Main@High 55 | - Main@High 1440 56 | Main 10: 57 | - Main 10@L2.1@Main 58 | - Main 10@L3@Main 59 | - Main 10@L3.1@Main 60 | - Main 10@L4@Main 61 | - Main 10@L4@High 62 | - Main 10@L4.1@Main 63 | - Main 10@L5@Main 64 | - Main 10@L5.1@Main 65 | High: 66 | - High@L3 67 | - High@L3.0 68 | - High@L3.1 69 | - High@L3.2 70 | - High@L4 71 | - High@L4.0 / High@L4.0 72 | - High@L4.0 73 | - High@L4.2 74 | - High@L5 75 | - High@L5.1 76 | 77 | VideoProfileLevel: 78 | '1': 79 | - Baseline@L1.0 80 | - Simple@L1 81 | '1.1': Baseline@L1.1 82 | '1.3': 83 | - Baseline@L1.3 84 | - Main@L1.3 85 | '2': Baseline@L2 86 | '2.1': 87 | - Baseline@L2.1 88 | - Main@L2.1 89 | - Main 10@L2.1@Main 90 | '2.2': 91 | - High@L2.2 92 | '3': 93 | - Advanced@L3 94 | - Simple@L3 95 | - Baseline@L3 96 | - Baseline@L3.0 97 | - Main@L3 98 | - Main@L3.0 99 | - Main 10@L3@Main 100 | - High@L3 101 | - High@L3.0 102 | '3.1': 103 | - Main@L3.1 104 | - Main@L3.1@Main 105 | - Main 10@L3.1@Main 106 | - High@L3.1 107 | '3.2': 108 | - Baseline@L3.2 109 | - Main@L3.2 110 | - High@L3.2 111 | '4': 112 | - Main@L4 113 | - Main@L4.0 114 | - Main@L4@Main 115 | - Main 10@L4@Main 116 | - Main 10@L4@High 117 | - High@L4 118 | - High@L4.0 119 | - High@L4.0 / High@L4.0 120 | '4.1': 121 | - Baseline@L4.1 122 | - Main 10@L4.1@Main 123 | '4.2': 124 | - High@L4.2 125 | '5': 126 | - Advanced Simple@L5 127 | - Main@L5@Main 128 | - Main 10@L5.0 129 | - Main 10@L5@Main 130 | - High@L5 131 | '5.1': 132 | - High@L5.1 133 | - Main 10@L5.1@Main 134 | Main: 135 | - Main@Main 136 | High: 137 | - Main@High 138 | High 1440: 139 | - Main@High 1440 140 | 141 | VideoProfileTier: 142 | Main: 143 | - Main@L3.1@Main 144 | - Main@L4@Main 145 | - Main@L5@Main 146 | - Main 10@L2.1@Main 147 | - Main 10@L3@Main 148 | - Main 10@L3.1@Main 149 | - Main 10@L4@Main 150 | - Main 10@L4.1@Main 151 | - Main 10@L5@Main 152 | - Main 10@L5.1@Main 153 | High: 154 | - Main 10@L4@High 155 | 156 | VideoHdrFormat: 157 | Dolby Vision: 158 | - Dolby Vision 159 | HDR10: 160 | - SMPTE ST 2086 161 | 162 | ScanType: 163 | Progressive: 164 | - Progressive 165 | - Progressive / Progressive 166 | Interlaced: MBAFF 167 | 168 | AudioCodec: 169 | AAC: 170 | - AAC LC 171 | - AAC LC-SBR 172 | - AAC LC-SBR-PS 173 | AC-3: AC3 174 | E-AC-3: AC3+ 175 | Atmos: Atmos 176 | TrueHD: 177 | - TrueHD 178 | - A_TRUEHD 179 | DTS: DTS 180 | DTS-HD: DTS-HD 181 | FLAC: FLAC 182 | PCM: PCM 183 | Vorbis: Vorbis 184 | MP2: MPA1L2 185 | MP3: 186 | - MPA1L3 187 | - MPA2L3 188 | WMA 2: 161 189 | WMA Pro: 162 190 | RT29 MetaSound: 75 191 | 192 | AudioProfile: 193 | High Efficiency: HE-AAC 194 | High Efficiency v2: HE-AACv2 195 | Master Audio: MA 196 | Low Complexity: LC 197 | High Resolution Audio: HRA 198 | 96/24: 96/24 199 | Extended Surround: 200 | - ES Discrete 201 | - ES Matrix 202 | Layer 2: Layer 2 203 | Layer 3: Layer 3 204 | Pro: Pro 205 | 206 | AudioCompression: 207 | Lossy: Lossy 208 | Lossless: Lossless 209 | 210 | BitRateMode: 211 | Constant: CBR 212 | Variable: VBR 213 | 214 | SubtitleFormat: 215 | ASS: S_TEXT/ASS 216 | SSA: S_TEXT/SSA 217 | VobSub: S_VOBSUB 218 | PGS: S_HDMV/PGS 219 | SubRip: S_TEXT/UTF8 220 | DVBSub: 6 221 | -------------------------------------------------------------------------------- /knowit/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import typing 4 | from decimal import Decimal 5 | 6 | from knowit import VIDEO_EXTENSIONS 7 | 8 | OS_FAMILY = typing.Literal['windows', 'macos', 'unix'] 9 | 10 | OPTION_MAP = typing.Dict[str, typing.Tuple[str]] 11 | 12 | 13 | def recurse_paths( 14 | paths: typing.Union[str, typing.Iterable[str]] 15 | ) -> typing.List[str]: 16 | """Return a list of video files.""" 17 | enc_paths = [] 18 | 19 | if isinstance(paths, str): 20 | paths = [p.strip() for p in paths.split(',')] if ',' in paths else paths.split() 21 | 22 | for path in paths: 23 | if os.path.isfile(path): 24 | enc_paths.append(path) 25 | if os.path.isdir(path): 26 | for root, directories, filenames in os.walk(path): 27 | for filename in filenames: 28 | if os.path.splitext(filename)[1] in VIDEO_EXTENSIONS: 29 | full_path = os.path.join(root, filename) 30 | enc_paths.append(full_path) 31 | 32 | # Lets remove any dupes since mediainfo is rather slow. 33 | unique_paths = dict.fromkeys(enc_paths) 34 | return list(unique_paths) 35 | 36 | 37 | def to_dict( 38 | obj: typing.Any, 39 | classkey: typing.Optional[typing.Type] = None 40 | ) -> typing.Union[str, dict, list]: 41 | """Transform an object to dict.""" 42 | if isinstance(obj, str): 43 | return obj 44 | elif isinstance(obj, dict): 45 | data = {} 46 | for (k, v) in obj.items(): 47 | data[k] = to_dict(v, classkey) 48 | return data 49 | elif hasattr(obj, '_ast'): 50 | return to_dict(obj._ast()) 51 | elif hasattr(obj, '__iter__'): 52 | return [to_dict(v, classkey) for v in obj] 53 | elif hasattr(obj, '__dict__'): 54 | values = [(key, to_dict(value, classkey)) 55 | for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')] 56 | data = {k: v for k, v in values if v is not None} 57 | if classkey is not None and hasattr(obj, '__class__'): 58 | data[classkey] = obj.__class__.__name__ 59 | return data 60 | return obj 61 | 62 | 63 | def detect_os() -> OS_FAMILY: 64 | """Detect os family: windows, macos or unix.""" 65 | if os.name in ('nt', 'dos', 'os2', 'ce'): 66 | return 'windows' 67 | if sys.platform == 'darwin': 68 | return 'macos' 69 | return 'unix' 70 | 71 | 72 | def define_candidate( 73 | locations: OPTION_MAP, 74 | names: OPTION_MAP, 75 | os_family: typing.Optional[OS_FAMILY] = None, 76 | suggested_path: typing.Optional[str] = None, 77 | ) -> typing.Generator[str, None, None]: 78 | """Select family-specific options and generate possible candidates.""" 79 | os_family = os_family or detect_os() 80 | family_names = names[os_family] 81 | all_locations = (suggested_path, ) + locations[os_family] 82 | yield from build_candidates(all_locations, family_names) 83 | 84 | 85 | def build_candidates( 86 | locations: typing.Iterable[typing.Optional[str]], 87 | names: typing.Iterable[str], 88 | ) -> typing.Generator[str, None, None]: 89 | """Build candidate names.""" 90 | for location in locations: 91 | if not location: 92 | continue 93 | if location == '__PATH__': 94 | yield from build_path_candidates(names) 95 | elif os.path.isfile(location): 96 | yield location 97 | elif os.path.isdir(location): 98 | for name in names: 99 | cmd = os.path.join(location, name) 100 | if os.path.isfile(cmd): 101 | yield cmd 102 | 103 | 104 | def build_path_candidates( 105 | names: typing.Iterable[str], 106 | os_family: typing.Optional[OS_FAMILY] = None, 107 | ) -> typing.Generator[str, None, None]: 108 | """Build candidate names on environment PATH.""" 109 | os_family = os_family or detect_os() 110 | if os_family != 'windows': 111 | yield from names 112 | else: 113 | paths = os.environ['PATH'].split(';') 114 | yield from ( 115 | os.path.join(path, name) 116 | for path in paths 117 | for name in names 118 | ) 119 | yield from names 120 | 121 | 122 | def round_decimal(value: Decimal, min_digits=0, max_digits: typing.Optional[int] = None): 123 | exponent = int(value.normalize().as_tuple().exponent) 124 | if exponent >= 0: 125 | return round(value, min_digits) 126 | 127 | decimal_places = abs(exponent) 128 | if decimal_places <= min_digits: 129 | return round(value, min_digits) 130 | if max_digits: 131 | return round(value, min(max_digits, decimal_places)) 132 | return value 133 | -------------------------------------------------------------------------------- /knowit/api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import typing 4 | 5 | from knowit import __version__ 6 | from knowit.config import Config 7 | from knowit.provider import Provider 8 | from .providers import ( 9 | EnzymeProvider, 10 | FFmpegProvider, 11 | MediaInfoProvider, 12 | MkvMergeProvider, 13 | ) 14 | 15 | _provider_map = { 16 | 'mediainfo': MediaInfoProvider, 17 | 'ffmpeg': FFmpegProvider, 18 | 'mkvmerge': MkvMergeProvider, 19 | 'enzyme': EnzymeProvider, 20 | } 21 | 22 | provider_names = _provider_map.keys() 23 | 24 | available_providers: typing.Dict[str, Provider] = {} 25 | 26 | 27 | class KnowitException(Exception): 28 | """Exception raised when knowit encounters an internal error.""" 29 | 30 | 31 | def initialize(context: typing.Optional[typing.Mapping] = None, *, force: bool = False) -> None: 32 | """Initialize knowit, reload provider if a new suggested path is given.""" 33 | context = context or {} 34 | config = Config.build(context.get('config')) 35 | for name, provider_cls in _provider_map.items(): 36 | general_config = getattr(config, 'general', {}) 37 | suggested_path = context.get(name) or general_config.get(name) 38 | # create provider if it is not initialized or if it is not loaded and suggesting a new path 39 | p = available_providers.get(name) 40 | if force or p is None or ( 41 | not p.loaded() and not p.match_executor_location(suggested_path) 42 | ): 43 | available_providers[name] = provider_cls(config, suggested_path) 44 | 45 | 46 | def know( 47 | video_path: typing.Union[str, os.PathLike], 48 | context: typing.Optional[typing.MutableMapping] = None 49 | ) -> typing.Mapping: 50 | """Return a mapping of video metadata.""" 51 | video_path = os.fspath(video_path) 52 | 53 | try: 54 | context = context or {} 55 | context.setdefault('profile', 'default') 56 | initialize(context) 57 | 58 | for name, provider in available_providers.items(): 59 | if name != (context.get('provider') or name): 60 | continue 61 | 62 | if provider.accepts(video_path): 63 | result = provider.describe(video_path, context) 64 | if result: 65 | return result 66 | 67 | return {} 68 | except Exception: 69 | raise KnowitException(debug_info(context=context, exc_info=True)) 70 | 71 | 72 | def dependencies(context: typing.Optional[typing.Mapping] = None) -> typing.Mapping: 73 | """Return all dependencies detected by knowit.""" 74 | deps = {} 75 | try: 76 | initialize(context) 77 | for name in _provider_map: 78 | if name in available_providers: 79 | deps[name] = available_providers[name].version 80 | else: 81 | deps[name] = {} 82 | except Exception: 83 | pass 84 | 85 | return deps 86 | 87 | 88 | def loaded_providers(options: typing.Union[dict[str, typing.Any], None] = None) -> dict[str, bool]: 89 | """Return a dict with each provider and if they are installed.""" 90 | # initialize providers with options 91 | initialize(options) 92 | 93 | # return a dict of providers and the loaded state 94 | return {k: p.loaded() for k, p in available_providers.items()} 95 | 96 | 97 | def _centered(value: str) -> str: 98 | value = value[-52:] 99 | return f'| {value:^53} |' 100 | 101 | 102 | def debug_info( 103 | context: typing.Optional[typing.MutableMapping] = None, 104 | exc_info: bool = False, 105 | ) -> str: 106 | lines = [ 107 | '+-------------------------------------------------------+', 108 | _centered(f'KnowIt {__version__}'), 109 | '+-------------------------------------------------------+' 110 | ] 111 | 112 | first = True 113 | for info in dependencies(context).values(): 114 | if not first: 115 | lines.append(_centered('')) 116 | first = False 117 | 118 | for k, v in info.items(): 119 | lines.append(_centered(k)) 120 | lines.append(_centered(v)) 121 | 122 | if context: 123 | debug_data = context.pop('debug_data', None) 124 | 125 | lines.append('+-------------------------------------------------------+') 126 | for k, v in context.items(): 127 | if v: 128 | lines.append(_centered(f'{k}: {v}')) 129 | 130 | if debug_data: 131 | lines.append('+-------------------------------------------------------+') 132 | lines.append(debug_data()) 133 | 134 | if exc_info: 135 | lines.append('+-------------------------------------------------------+') 136 | lines.append(traceback.format_exc()) 137 | 138 | lines.append('+-------------------------------------------------------+') 139 | lines.append(_centered('Please report any bug or feature request at')) 140 | lines.append(_centered('https://github.com/ratoaq2/knowit/issues.')) 141 | lines.append('+-------------------------------------------------------+') 142 | 143 | return '\n'.join(lines) 144 | -------------------------------------------------------------------------------- /knowit/properties/general.py: -------------------------------------------------------------------------------- 1 | import re 2 | import typing 3 | from datetime import timedelta 4 | from decimal import Decimal, InvalidOperation 5 | 6 | import babelfish 7 | 8 | from knowit.core import Configurable, Property, T 9 | from knowit.utils import round_decimal 10 | 11 | 12 | class Basic(Property[T]): 13 | """Basic property to handle int, Decimal and other basic types.""" 14 | 15 | def __init__(self, *args: str, data_type: typing.Type, 16 | processor: typing.Optional[typing.Callable[[T], T]] = None, 17 | allow_fallback: bool = False, **kwargs): 18 | """Init method.""" 19 | super().__init__(*args, **kwargs) 20 | self.data_type = data_type 21 | self.processor = processor or (lambda x: x) 22 | self.allow_fallback = allow_fallback 23 | 24 | def handle(self, value, context: typing.MutableMapping): 25 | """Handle value.""" 26 | if isinstance(value, self.data_type): 27 | return self.processor(value) 28 | 29 | try: 30 | return self.processor(self.data_type(value)) 31 | except ValueError: 32 | if not self.allow_fallback: 33 | self.report(value, context) 34 | 35 | 36 | class Duration(Property[timedelta]): 37 | """Duration property.""" 38 | 39 | duration_re = re.compile(r'(?P\d{1,2}):' 40 | r'(?P\d{1,2}):' 41 | r'(?P\d{1,2})(?:\.' 42 | r'(?P\d{3})' 43 | r'(?P\d{3})?\d*)?') 44 | 45 | def __init__(self, *args: str, resolution: typing.Union[int, Decimal] = 1, **kwargs): 46 | """Initialize a Duration.""" 47 | super().__init__(*args, **kwargs) 48 | self.resolution = resolution 49 | 50 | def handle(self, value, context: typing.MutableMapping): 51 | """Return duration as timedelta.""" 52 | if isinstance(value, timedelta): 53 | return value 54 | elif isinstance(value, int): 55 | return timedelta(milliseconds=int(value * self.resolution)) 56 | try: 57 | return timedelta( 58 | milliseconds=int(Decimal(value) * self.resolution)) 59 | except (ValueError, InvalidOperation): 60 | pass 61 | 62 | match = self.duration_re.match(value) 63 | if not match: 64 | self.report(value, context) 65 | return None 66 | 67 | params = { 68 | key: int(value) 69 | for key, value in match.groupdict().items() 70 | if value 71 | } 72 | return timedelta(**params) 73 | 74 | 75 | class Language(Property[babelfish.Language]): 76 | """Language property.""" 77 | 78 | def handle(self, value, context: typing.MutableMapping): 79 | """Handle languages.""" 80 | try: 81 | if len(value) == 3: 82 | try: 83 | return babelfish.Language.fromalpha3b(value) 84 | except babelfish.Error: 85 | # Try alpha3t if alpha3b fails 86 | return babelfish.Language.fromalpha3t(value) 87 | 88 | return babelfish.Language.fromietf(value) 89 | except (babelfish.Error, ValueError): 90 | pass 91 | 92 | try: 93 | return babelfish.Language.fromname(value) 94 | except babelfish.Error: 95 | pass 96 | 97 | self.report(value, context) 98 | return babelfish.Language('und') 99 | 100 | 101 | class Quantity(Property): 102 | """Quantity is a property with unit.""" 103 | 104 | def __init__(self, *args: str, unit, data_type=int, **kwargs): 105 | """Init method.""" 106 | super().__init__(*args, **kwargs) 107 | self.unit = unit 108 | self.data_type = data_type 109 | 110 | def handle(self, value, context): 111 | """Handle value with unit.""" 112 | if not isinstance(value, self.data_type): 113 | try: 114 | value = self.data_type(value) 115 | except ValueError: 116 | self.report(value, context) 117 | return 118 | if isinstance(value, Decimal): 119 | value = round_decimal(value, min_digits=1, max_digits=3) 120 | 121 | return value if context.get('no_units') else value * self.unit 122 | 123 | 124 | class YesNo(Configurable[str]): 125 | """Yes or No handler.""" 126 | 127 | yes_values = ('yes', 'true', '1') 128 | 129 | def __init__(self, *args: str, yes=True, no=False, hide_value=None, 130 | config: typing.Optional[ 131 | typing.Mapping[str, typing.Mapping]] = None, 132 | config_key: typing.Optional[str] = None, 133 | **kwargs): 134 | """Init method.""" 135 | super().__init__(config or {}, config_key=config_key, *args, **kwargs) 136 | self.yes = yes 137 | self.no = no 138 | self.hide_value = hide_value 139 | 140 | def handle(self, value, context): 141 | """Handle boolean values.""" 142 | result = self.yes if str(value).lower() in self.yes_values else self.no 143 | if result == self.hide_value: 144 | return None 145 | 146 | return super().handle(result, context) if self.mapping else result 147 | -------------------------------------------------------------------------------- /tests/data/mkvmerge/media_001.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "attachments": [], 3 | "chapters": [ 4 | { 5 | "num_entries": 8 6 | } 7 | ], 8 | "container": { 9 | "properties": { 10 | "container_type": 17, 11 | "date_local": "2020-11-11T07:35:31+01:00", 12 | "date_utc": "2020-11-11T06:35:31Z", 13 | "duration": 3428352000000, 14 | "is_providing_timestamps": true, 15 | "muxing_application": "libebml v1.4.0 + libmatroska v1.6.2", 16 | "segment_uid": "5c5db0c93c7ebac3c88f7d372288b20e", 17 | "title": "Media 001", 18 | "writing_application": "mkvmerge v51.0.0 ('I Wish') 64-bit" 19 | }, 20 | "recognized": true, 21 | "supported": true, 22 | "type": "Matroska" 23 | }, 24 | "errors": [], 25 | "file_name": "tests/data/mkvmerge/media_001.mkv", 26 | "global_tags": [], 27 | "identification_format_version": 14, 28 | "track_tags": [], 29 | "tracks": [ 30 | { 31 | "codec": "HEVC/H.265/MPEG-H", 32 | "id": 0, 33 | "properties": { 34 | "codec_id": "V_MPEGH/ISO/HEVC", 35 | "codec_private_data": "012220000000b0000000000099f000fcfdfafa00000f04600001002240010c01ffff222000000300b0000003000003009914860300000303e900005dc050610001003d420101222000000300b00000030000030099a001e020021c4db1486924294af016a1220136c2000007d20000bb80c5781cdc0004eef80009ddf7cf1e3d62000100074401c172f63b64670002001e4e01891821349baa199608fc8a4839083d13404200989680000003001480000a4e019004000003000080", 36 | "codec_private_length": 187, 37 | "default_duration": 41708333, 38 | "default_track": false, 39 | "display_dimensions": "3840x2160", 40 | "display_unit": 0, 41 | "enabled_track": true, 42 | "forced_track": false, 43 | "language": "eng", 44 | "language_ietf": "en", 45 | "minimum_timestamp": 0, 46 | "number": 1, 47 | "packetizer": "mpegh_p2_video", 48 | "pixel_dimensions": "3840x2160", 49 | "uid": 1 50 | }, 51 | "type": "video" 52 | }, 53 | { 54 | "codec": "TrueHD Atmos", 55 | "id": 1, 56 | "properties": { 57 | "audio_channels": 8, 58 | "audio_sampling_frequency": 48000, 59 | "codec_id": "A_TRUEHD", 60 | "codec_private_length": 0, 61 | "default_track": true, 62 | "enabled_track": true, 63 | "forced_track": false, 64 | "language": "eng", 65 | "language_ietf": "en", 66 | "minimum_timestamp": 0, 67 | "number": 2, 68 | "uid": 2 69 | }, 70 | "type": "audio" 71 | }, 72 | { 73 | "codec": "AC-3 Dolby Surround EX", 74 | "id": 2, 75 | "properties": { 76 | "audio_channels": 6, 77 | "audio_sampling_frequency": 48000, 78 | "codec_id": "A_AC3", 79 | "codec_private_length": 0, 80 | "default_duration": 32000000, 81 | "default_track": false, 82 | "enabled_track": true, 83 | "forced_track": false, 84 | "language": "eng", 85 | "language_ietf": "en", 86 | "minimum_timestamp": 0, 87 | "number": 3, 88 | "uid": 3 89 | }, 90 | "type": "audio" 91 | }, 92 | { 93 | "codec": "AC-3", 94 | "id": 3, 95 | "properties": { 96 | "audio_channels": 6, 97 | "audio_sampling_frequency": 48000, 98 | "codec_id": "A_AC3", 99 | "codec_private_length": 0, 100 | "default_duration": 32000000, 101 | "default_track": false, 102 | "enabled_track": true, 103 | "forced_track": false, 104 | "language": "ger", 105 | "language_ietf": "de", 106 | "minimum_timestamp": 0, 107 | "number": 4, 108 | "uid": 4 109 | }, 110 | "type": "audio" 111 | }, 112 | { 113 | "codec": "DTS", 114 | "id": 4, 115 | "properties": { 116 | "audio_bits_per_sample": 24, 117 | "audio_channels": 2, 118 | "audio_sampling_frequency": 48000, 119 | "codec_id": "A_DTS", 120 | "codec_private_length": 0, 121 | "default_duration": 10666667, 122 | "default_track": false, 123 | "enabled_track": true, 124 | "forced_track": false, 125 | "language": "por", 126 | "language_ietf": "pt-BR", 127 | "minimum_timestamp": 2002000000, 128 | "number": 5, 129 | "uid": 5 130 | }, 131 | "type": "audio" 132 | }, 133 | { 134 | "codec": "HDMV PGS", 135 | "id": 5, 136 | "properties": { 137 | "codec_id": "S_HDMV/PGS", 138 | "codec_private_length": 0, 139 | "content_encoding_algorithms": "0", 140 | "default_track": false, 141 | "enabled_track": true, 142 | "forced_track": false, 143 | "language": "eng", 144 | "language_ietf": "en", 145 | "number": 6, 146 | "uid": 6 147 | }, 148 | "type": "subtitles" 149 | }, 150 | { 151 | "codec": "HDMV PGS", 152 | "id": 6, 153 | "properties": { 154 | "codec_id": "S_HDMV/PGS", 155 | "codec_private_length": 0, 156 | "content_encoding_algorithms": "0", 157 | "default_track": false, 158 | "enabled_track": true, 159 | "forced_track": false, 160 | "language": "ger", 161 | "language_ietf": "de", 162 | "number": 7, 163 | "uid": 11 164 | }, 165 | "type": "subtitles" 166 | }, 167 | { 168 | "codec": "HDMV PGS", 169 | "id": 7, 170 | "properties": { 171 | "codec_id": "S_HDMV/PGS", 172 | "codec_private_length": 0, 173 | "content_encoding_algorithms": "0", 174 | "default_track": false, 175 | "enabled_track": true, 176 | "forced_track": false, 177 | "language": "por", 178 | "language_ietf": "pt-BR", 179 | "number": 8, 180 | "uid": 14 181 | }, 182 | "type": "subtitles" 183 | } 184 | ], 185 | "warnings": [] 186 | } 187 | -------------------------------------------------------------------------------- /tests/test_resolution.yml: -------------------------------------------------------------------------------- 1 | # https://en.wikipedia.org/wiki/Pixel_aspect_ratio 2 | # https://en.wikipedia.org/wiki/Low-definition_television 3 | # https://knowledge.kaltura.com/best-practices-multi-device-transcoding 4 | 240p: 5 | - width: 320 pixel 6 | height: 250 pixel 7 | scan_type: Progressive 8 | aspect_ratio: 1.28 9 | pixel_aspect_ratio: 1.0 10 | - width: 320 pixel 11 | height: 250 pixel 12 | scan_type: Progressive 13 | - width: 320 pixel 14 | height: 262 pixel 15 | scan_type: Progressive 16 | aspect_ratio: 1.221 17 | pixel_aspect_ratio: 1.0 18 | - width: 320 pixel 19 | height: 262 pixel 20 | scan_type: Progressive 21 | 22 | 288p: 23 | - width: 480 pixel 24 | height: 270 pixel 25 | scan_type: Progressive 26 | aspect_ratio: 1.778 27 | pixel_aspect_ratio: 1.0 28 | - width: 480 pixel 29 | height: 270 pixel 30 | scan_type: Progressive 31 | 32 | 360p: 33 | - width: 640 pixel 34 | height: 360 pixel 35 | scan_type: Progressive 36 | aspect_ratio: 1.778 37 | pixel_aspect_ratio: 1.0 38 | - width: 640 pixel 39 | height: 360 pixel 40 | scan_type: Progressive 41 | - width: 640 pixel 42 | height: 352 pixel 43 | scan_type: Progressive 44 | aspect_ratio: 1.818 45 | pixel_aspect_ratio: 1.0 46 | - width: 640 pixel 47 | height: 352 pixel 48 | scan_type: Progressive 49 | - width: 592 pixel 50 | height: 320 pixel 51 | scan_type: Progressive 52 | aspect_ratio: 1.85 53 | pixel_aspect_ratio: 1.0 54 | - width: 592 pixel 55 | height: 320 pixel 56 | scan_type: Progressive 57 | - width: 640 pixel 58 | height: 320 pixel 59 | scan_type: Progressive 60 | - width: 640 pixel 61 | height: 352 pixel 62 | scan_type: Progressive 63 | 64 | 480p: 65 | - width: 640 pixel 66 | height: 320 pixel 67 | scan_type: Progressive 68 | aspect_ratio: 2.0 69 | pixel_aspect_ratio: 1.092 70 | - width: 752 pixel 71 | height: 398 pixel 72 | scan_type: Progressive 73 | aspect_ratio: 1.889 74 | pixel_aspect_ratio: 1.0 75 | - width: 752 pixel 76 | height: 398 pixel 77 | scan_type: Progressive 78 | - width: 720 pixel 79 | height: 400 pixel 80 | scan_type: Progressive 81 | aspect_ratio: 1.8 82 | pixel_aspect_ratio: 1.0 83 | - width: 720 pixel 84 | height: 400 pixel 85 | scan_type: Progressive 86 | 87 | 576p: 88 | - width: 720 pixel 89 | height: 576 pixel 90 | scan_type: Progressive 91 | aspect_ratio: 1.333 92 | pixel_aspect_ratio: 1.067 93 | - width: 720 pixel 94 | height: 576 pixel 95 | scan_type: Progressive 96 | - width: 720 pixel 97 | height: 576 pixel 98 | scan_type: Progressive 99 | aspect_ratio: 1.778 100 | pixel_aspect_ratio: 1.422 # not sure 101 | - width: 720 pixel 102 | height: 576 pixel 103 | scan_type: Progressive 104 | - width: 640 pixel 105 | height: 352 pixel 106 | scan_type: Progressive 107 | aspect_ratio: 1.818 108 | pixel_aspect_ratio: 1.422 109 | - width: 720 pixel 110 | height: 596 pixel 111 | scan_type: Progressive 112 | aspect_ratio: 1.289 113 | pixel_aspect_ratio: 1.067 114 | - width: 720 pixel 115 | height: 586 pixel 116 | scan_type: Progressive 117 | aspect_ratio: 1.311 118 | pixel_aspect_ratio: 1.067 119 | - width: 720 pixel 120 | height: 588 pixel 121 | scan_type: Progressive 122 | aspect_ratio: 1.304 123 | pixel_aspect_ratio: 1.065 124 | - width: 720 pixel 125 | height: 590 pixel 126 | scan_type: Progressive 127 | aspect_ratio: 1.302 128 | pixel_aspect_ratio: 1.067 129 | 130 | 720p: 131 | - width: 1280 pixel 132 | height: 720 pixel 133 | scan_type: Progressive 134 | aspect_ratio: 1.778 135 | pixel_aspect_ratio: 1.0 136 | - width: 1280 pixel 137 | height: 720 pixel 138 | scan_type: Progressive 139 | - width: 1280 pixel 140 | height: 544 pixel 141 | scan_type: Progressive 142 | aspect_ratio: 2.353 143 | pixel_aspect_ratio: 1.0 144 | - width: 1280 pixel 145 | height: 544 pixel 146 | scan_type: Progressive 147 | - width: 1280 pixel 148 | height: 542 pixel 149 | scan_type: Progressive 150 | aspect_ratio: 2.362 151 | pixel_aspect_ratio: 1.0 152 | - width: 1280 pixel 153 | height: 542 pixel 154 | scan_type: Progressive 155 | - width: 1280 pixel 156 | height: 526 pixel 157 | scan_type: Progressive 158 | aspect_ratio: 2.433 159 | pixel_aspect_ratio: 1.0 160 | - width: 1280 pixel 161 | height: 526 pixel 162 | scan_type: Progressive 163 | - width: 1264 pixel 164 | height: 718 pixel 165 | scan_type: Progressive 166 | aspect_ratio: 1.76 167 | pixel_aspect_ratio: 1.0 168 | - width: 1264 pixel 169 | height: 718 pixel 170 | scan_type: Progressive 171 | 172 | 1080p: 173 | - width: 1920 pixel 174 | height: 1080 pixel 175 | scan_type: Progressive 176 | aspect_ratio: 1.778 177 | pixel_aspect_ratio: 1.0 178 | - width: 1920 pixel 179 | height: 1080 pixel 180 | scan_type: Progressive 181 | - width: 1426 pixel 182 | height: 1080 pixel 183 | scan_type: Progressive 184 | aspect_ratio: 1.319 185 | - width: 1426 pixel 186 | height: 1080 pixel 187 | scan_type: Progressive 188 | - width: 1920 pixel 189 | height: 800 pixel 190 | scan_type: Progressive 191 | aspect_ratio: 2.4 192 | pixel_aspect_ratio: 1.0 193 | - width: 1920 pixel 194 | height: 800 pixel 195 | scan_type: Progressive 196 | - width: 1920 pixel 197 | height: 796 pixel 198 | scan_type: Progressive 199 | aspect_ratio: 2.412 200 | pixel_aspect_ratio: 1.0 201 | - width: 1920 pixel 202 | height: 796 pixel 203 | scan_type: Progressive 204 | - width: 1920 pixel 205 | height: 540 pixel 206 | scan_type: Progressive 207 | aspect_ratio: 3.556 208 | pixel_aspect_ratio: 1.0 209 | - width: 1920 pixel 210 | height: 540 pixel 211 | scan_type: Progressive 212 | -------------------------------------------------------------------------------- /tests/data/ffmpeg/issue-39-example-02.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "streams": [ 3 | { 4 | "index": 0, 5 | "codec_name": "hevc", 6 | "codec_long_name": "H.265 / HEVC (High Efficiency Video Coding)", 7 | "profile": "Main", 8 | "codec_type": "video", 9 | "codec_time_base": "1001/24000", 10 | "codec_tag_string": "[0][0][0][0]", 11 | "codec_tag": "0x0000", 12 | "width": 1920, 13 | "height": 1080, 14 | "coded_width": 1920, 15 | "coded_height": 1080, 16 | "closed_captions": 0, 17 | "has_b_frames": 2, 18 | "sample_aspect_ratio": "1:1", 19 | "display_aspect_ratio": "16:9", 20 | "pix_fmt": "yuv420p", 21 | "level": 120, 22 | "color_range": "tv", 23 | "color_space": "bt709", 24 | "color_transfer": "bt709", 25 | "color_primaries": "bt709", 26 | "refs": 1, 27 | "r_frame_rate": "24000/1001", 28 | "avg_frame_rate": "24000/1001", 29 | "time_base": "1/1000", 30 | "start_pts": 105, 31 | "start_time": "0:00:00.105000", 32 | "disposition": { 33 | "default": 1, 34 | "dub": 0, 35 | "original": 0, 36 | "comment": 0, 37 | "lyrics": 0, 38 | "karaoke": 0, 39 | "forced": 0, 40 | "hearing_impaired": 0, 41 | "visual_impaired": 0, 42 | "clean_effects": 0, 43 | "attached_pic": 0, 44 | "timed_thumbnails": 0 45 | }, 46 | "tags": { 47 | "BPS": "1500697", 48 | "DURATION": "01:13:14.056708333", 49 | "NUMBER_OF_FRAMES": "105352", 50 | "NUMBER_OF_BYTES": "824268753", 51 | "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit", 52 | "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05", 53 | "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 54 | } 55 | }, 56 | { 57 | "index": 1, 58 | "codec_name": "aac", 59 | "codec_long_name": "AAC (Advanced Audio Coding)", 60 | "profile": "HE-AAC", 61 | "codec_type": "audio", 62 | "codec_time_base": "1/48000", 63 | "codec_tag_string": "[0][0][0][0]", 64 | "codec_tag": "0x0000", 65 | "sample_fmt": "fltp", 66 | "sample_rate": "48000", 67 | "channels": 6, 68 | "channel_layout": "5.1", 69 | "bits_per_sample": 0, 70 | "r_frame_rate": "0/0", 71 | "avg_frame_rate": "0/0", 72 | "time_base": "1/1000", 73 | "start_pts": 0, 74 | "start_time": "0:00:00.000000", 75 | "disposition": { 76 | "default": 1, 77 | "dub": 0, 78 | "original": 0, 79 | "comment": 0, 80 | "lyrics": 0, 81 | "karaoke": 0, 82 | "forced": 0, 83 | "hearing_impaired": 0, 84 | "visual_impaired": 0, 85 | "clean_effects": 0, 86 | "attached_pic": 0, 87 | "timed_thumbnails": 0 88 | }, 89 | "tags": { 90 | "title": "Surround", 91 | "LANGUAGE": "eng", 92 | "BPS": "256002", 93 | "DURATION": "01:13:12.106000000", 94 | "NUMBER_OF_FRAMES": "102941", 95 | "NUMBER_OF_BYTES": "140548779", 96 | "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit", 97 | "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05", 98 | "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 99 | } 100 | }, 101 | { 102 | "index": 2, 103 | "codec_name": "hdmv_pgs_subtitle", 104 | "codec_long_name": "HDMV Presentation Graphic Stream subtitles", 105 | "codec_type": "subtitle", 106 | "codec_time_base": "0/1", 107 | "codec_tag_string": "[0][0][0][0]", 108 | "codec_tag": "0x0000", 109 | "r_frame_rate": "0/0", 110 | "avg_frame_rate": "0/0", 111 | "time_base": "1/1000", 112 | "start_pts": 0, 113 | "start_time": "0:00:00.000000", 114 | "duration_ts": 4394162, 115 | "duration": "1:13:14.162000", 116 | "disposition": { 117 | "default": 0, 118 | "dub": 0, 119 | "original": 0, 120 | "comment": 0, 121 | "lyrics": 0, 122 | "karaoke": 0, 123 | "forced": 0, 124 | "hearing_impaired": 0, 125 | "visual_impaired": 0, 126 | "clean_effects": 0, 127 | "attached_pic": 0, 128 | "timed_thumbnails": 0 129 | }, 130 | "tags": { 131 | "LANGUAGE": "eng", 132 | "BPS": "30412", 133 | "DURATION": "01:13:02.712000000", 134 | "NUMBER_OF_FRAMES": "1518", 135 | "NUMBER_OF_BYTES": "16660969", 136 | "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit", 137 | "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05", 138 | "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 139 | } 140 | } 141 | ], 142 | "format": { 143 | "filename": "Z:\\Videos\\Shows (Prospective)\\Band of Brothers (2001)\\01x01 - Currahee.mkv", 144 | "nb_streams": 3, 145 | "nb_programs": 0, 146 | "format_name": "matroska,webm", 147 | "format_long_name": "Matroska / WebM", 148 | "start_time": "0:00:00.000000", 149 | "duration": "1:13:14.162000", 150 | "size": "983004345", 151 | "bit_rate": "1789655", 152 | "probe_score": 100, 153 | "tags": { 154 | "ENCODER": "Lavf55.12.0" 155 | } 156 | } 157 | } -------------------------------------------------------------------------------- /knowit/provider.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import typing 4 | from logging import NullHandler, getLogger 5 | 6 | import knowit.config 7 | from knowit.core import Property, Rule 8 | from knowit.properties import Quantity 9 | from knowit.units import units 10 | 11 | logger = getLogger(__name__) 12 | logger.addHandler(NullHandler()) 13 | 14 | 15 | size_property = Quantity('size', unit=units.byte, description='media size') 16 | 17 | PropertyMap = typing.Mapping[str, Property] 18 | PropertyConfig = typing.Mapping[str, PropertyMap] 19 | 20 | RuleMap = typing.Mapping[str, Rule] 21 | RuleConfig = typing.Mapping[str, RuleMap] 22 | 23 | 24 | class Provider: 25 | """Base class for all providers.""" 26 | 27 | executor: typing.Union["Executor", None] 28 | min_fps = 10 29 | max_fps = 200 30 | 31 | def __init__( 32 | self, 33 | config: knowit.config.Config, 34 | mapping: PropertyConfig, 35 | rules: typing.Optional[RuleConfig] = None, 36 | ): 37 | """Init method.""" 38 | self.config = config 39 | self.mapping = mapping 40 | self.rules = rules or {} 41 | self.executor = None 42 | 43 | def loaded(self) -> bool: 44 | """Whether or not this provider was loaded.""" 45 | raise NotImplementedError 46 | 47 | def accepts(self, target): 48 | """Whether or not the video is supported by this provider.""" 49 | raise NotImplementedError 50 | 51 | def describe(self, target, context): 52 | """Read video metadata information.""" 53 | raise NotImplementedError 54 | 55 | def _describe_tracks(self, video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context): 56 | logger.debug('Handling general track') 57 | props = self._describe_track(general_track, 'general', context) 58 | 59 | if 'path' not in props: 60 | props['path'] = video_path 61 | if 'container' not in props: 62 | props['container'] = os.path.splitext(video_path)[1][1:] 63 | if 'size' not in props and os.path.isfile(video_path): 64 | props['size'] = size_property.handle(os.path.getsize(video_path), context) 65 | 66 | for track_type, tracks, in (('video', video_tracks), 67 | ('audio', audio_tracks), 68 | ('subtitle', subtitle_tracks)): 69 | results = [] 70 | for track in tracks or []: 71 | logger.debug('Handling %s track', track_type) 72 | t = self._validate_track(track_type, self._describe_track(track, track_type, context)) 73 | if t: 74 | results.append(t) 75 | 76 | if results: 77 | props[track_type] = results 78 | 79 | return props 80 | 81 | @classmethod 82 | def _validate_track(cls, track_type, track): 83 | if track_type != 'video' or 'frame_rate' not in track: 84 | return track 85 | 86 | frame_rate = track['frame_rate'] 87 | try: 88 | frame_rate = frame_rate.magnitude 89 | except AttributeError: 90 | pass 91 | 92 | if cls.min_fps < frame_rate < cls.max_fps: 93 | return track 94 | 95 | def _describe_track(self, track, track_type, context): 96 | """Describe track to a dict. 97 | 98 | :param track: 99 | :param track_type: 100 | :rtype: dict 101 | """ 102 | props = {} 103 | pv_props = {} 104 | for name, prop in self.mapping[track_type].items(): 105 | if not prop: 106 | # placeholder to be populated by rules. It keeps the order 107 | props[name] = None 108 | continue 109 | 110 | value = prop.extract_value(track, context) 111 | if value is not None: 112 | which = props if not prop.private else pv_props 113 | which[name] = value 114 | 115 | for name, rule in self.rules.get(track_type, {}).items(): 116 | if props.get(name) is not None and not rule.override: 117 | logger.debug('Skipping rule %s since property is already present: %r', name, props[name]) 118 | continue 119 | 120 | value = rule.execute(props, pv_props, context) 121 | if value is not None: 122 | which = props if not rule.private else pv_props 123 | which[name] = value 124 | elif name in props and (not rule.override or props[name] is None): 125 | del props[name] 126 | 127 | return props 128 | 129 | def match_executor_location(self, suggested_path: typing.Union[str, None]) -> bool: 130 | """Compare the suggested path to the path that was suggested when creating the provider.""" 131 | if self.executor is None: 132 | return True 133 | if self.executor.location == suggested_path: 134 | return True 135 | return False 136 | 137 | @property 138 | def version(self): 139 | """Return provider version information.""" 140 | raise NotImplementedError 141 | 142 | 143 | class Executor: 144 | """Abstraction to a library or executable to be used by a provider.""" 145 | 146 | def __init__(self, location, version): 147 | """Initialize the object.""" 148 | self.location = location 149 | self.version = version 150 | 151 | def extract_info(self, filename): 152 | """Extract media info.""" 153 | raise NotImplementedError 154 | 155 | @classmethod 156 | def create(cls, os_family=None, suggested_path=None): 157 | """Create the executor instance.""" 158 | raise NotImplementedError 159 | 160 | @classmethod 161 | def get_executor_instance(cls, suggested_path=None) -> "Executor": 162 | """Return executor instance.""" 163 | raise NotImplementedError 164 | 165 | 166 | class NotFoundExecutor(Executor): 167 | """Executor with a library or executable that was not found.""" 168 | 169 | def __init__(self, location, version=None) -> None: 170 | """Initialize the object.""" 171 | self.location = location 172 | self.warned = False 173 | 174 | def __bool__(self) -> bool: 175 | """Executor not found is always False.""" 176 | return False 177 | 178 | def extract_info(self, filename): 179 | """Extract media info.""" 180 | return {} 181 | 182 | 183 | class ProviderError(Exception): 184 | """Base class for provider exceptions.""" 185 | 186 | pass 187 | 188 | 189 | class MalformedFileError(ProviderError): 190 | """Malformed File error.""" 191 | 192 | pass 193 | 194 | 195 | class UnsupportedFileFormatError(ProviderError): 196 | """Unsupported File Format error.""" 197 | 198 | pass 199 | -------------------------------------------------------------------------------- /knowit/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | import typing 7 | from argparse import ArgumentParser 8 | 9 | import yaml 10 | 11 | from knowit import ( 12 | __url__, 13 | __version__, 14 | api, 15 | ) 16 | from knowit.provider import ProviderError 17 | from knowit.serializer import ( 18 | get_json_encoder, 19 | get_yaml_dumper, 20 | ) 21 | from knowit.utils import recurse_paths 22 | 23 | logging.basicConfig(stream=sys.stdout, format='%(message)s') 24 | logging.getLogger('CONSOLE').setLevel(logging.INFO) 25 | logging.getLogger('knowit').setLevel(logging.ERROR) 26 | 27 | console = logging.getLogger('CONSOLE') 28 | logger = logging.getLogger('knowit') 29 | 30 | 31 | def build_argument_parser() -> ArgumentParser: 32 | """Build the argument parser.""" 33 | opts = ArgumentParser() 34 | opts.add_argument( 35 | dest='videopath', 36 | help='Path to the video to introspect', 37 | nargs='*', 38 | type=str, 39 | ) 40 | 41 | provider_opts = opts.add_argument_group('Providers') 42 | provider_opts.add_argument( 43 | '-p', 44 | '--provider', 45 | dest='provider', 46 | help='The provider to be used: mediainfo, ffmpeg, mkvmerge or enzyme.', 47 | type=str, 48 | ) 49 | 50 | output_opts = opts.add_argument_group('Output') 51 | output_opts.add_argument( 52 | '--debug', 53 | action='store_true', 54 | dest='debug', 55 | help='Print information for debugging knowit and for reporting bugs.' 56 | ) 57 | output_opts.add_argument( 58 | '--report', 59 | action='store_true', 60 | dest='report', 61 | help='Parse media and report all non-detected values' 62 | ) 63 | output_opts.add_argument( 64 | '-y', 65 | '--yaml', 66 | action='store_true', 67 | dest='yaml', 68 | help='Display output in yaml format' 69 | ) 70 | output_opts.add_argument( 71 | '-N', 72 | '--no-units', 73 | action='store_true', 74 | dest='no_units', 75 | help='Display output without units' 76 | ) 77 | output_opts.add_argument( 78 | '-P', 79 | '--profile', 80 | dest='profile', 81 | help='Display values according to specified profile: code, default, human, technical', 82 | type=str, 83 | ) 84 | 85 | conf_opts = opts.add_argument_group('Configuration') 86 | conf_opts.add_argument( 87 | '--mediainfo', 88 | dest='mediainfo', 89 | help='The location to search for MediaInfo binaries', 90 | type=str, 91 | ) 92 | conf_opts.add_argument( 93 | '--ffmpeg', 94 | dest='ffmpeg', 95 | help='The location to search for ffprobe (FFmpeg) binaries', 96 | type=str, 97 | ) 98 | conf_opts.add_argument( 99 | '--mkvmerge', 100 | dest='mkvmerge', 101 | help='The location to search for mkvmerge (MKVToolNix) binaries', 102 | type=str, 103 | ) 104 | 105 | information_opts = opts.add_argument_group('Information') 106 | information_opts.add_argument( 107 | '--version', 108 | dest='version', 109 | action='store_true', 110 | help='Display knowit version.' 111 | ) 112 | 113 | return opts 114 | 115 | 116 | def knowit( 117 | video_path: typing.Union[str, os.PathLike], 118 | options: argparse.Namespace, 119 | context: typing.MutableMapping, 120 | ) -> typing.Mapping: 121 | """Extract video metadata.""" 122 | context['path'] = video_path 123 | if not options.report: 124 | console.info('For: %s', video_path) 125 | else: 126 | console.info('Parsing: %s', video_path) 127 | info = api.know(video_path, context) 128 | if not options.report: 129 | console.info('Knowit %s found: ', __version__) 130 | console.info(dumps(info, options, context)) 131 | return info 132 | 133 | 134 | def _as_yaml( 135 | info: typing.Mapping[str, typing.Any], 136 | context: typing.Mapping, 137 | ) -> str: 138 | """Convert info to string using YAML format.""" 139 | data = {info['path']: info} if 'path' in info else info 140 | return yaml.dump( 141 | data, 142 | Dumper=get_yaml_dumper(context), 143 | default_flow_style=False, 144 | allow_unicode=True, 145 | sort_keys=False, 146 | ) 147 | 148 | 149 | def _as_json( 150 | info: typing.Mapping[str, typing.Any], 151 | context: typing.Mapping, 152 | ) -> str: 153 | """Convert info to string using JSON format.""" 154 | return json.dumps( 155 | info, 156 | cls=get_json_encoder(context), 157 | indent=4, 158 | ensure_ascii=False, 159 | ) 160 | 161 | 162 | def dumps( 163 | info: typing.Mapping[str, typing.Any], 164 | options: argparse.Namespace, 165 | context: typing.Mapping, 166 | ) -> str: 167 | """Convert info to string using json or yaml format.""" 168 | convert = _as_yaml if options.yaml else _as_json 169 | return convert(info, context) 170 | 171 | 172 | def main(args: typing.Optional[typing.List[str]] = None) -> None: 173 | """Execute main function for entry point.""" 174 | argument_parser = build_argument_parser() 175 | args = args or sys.argv[1:] 176 | options = argument_parser.parse_args(args) 177 | 178 | if options.debug: 179 | logger.setLevel(logging.DEBUG) 180 | logging.getLogger('enzyme').setLevel(logging.INFO) 181 | else: 182 | logger.setLevel(logging.WARNING) 183 | 184 | paths = recurse_paths(options.videopath) 185 | 186 | if not paths: 187 | if options.version: 188 | console.info(api.debug_info()) 189 | else: 190 | argument_parser.print_help() 191 | return 192 | 193 | report: typing.MutableMapping[str, str] = {} 194 | for i, video_path in enumerate(paths): 195 | try: 196 | context = {k: v for k, v in vars(options).items() if v is not None} 197 | if options.report: 198 | context['report'] = report 199 | else: 200 | del context['report'] 201 | knowit(video_path, options, context) 202 | except ProviderError: 203 | logger.exception('Error when processing video') 204 | except OSError: 205 | logger.exception('OS error when processing video') 206 | except UnicodeError: 207 | logger.exception('Character encoding error when processing video') 208 | except api.KnowitException as e: 209 | logger.error(e) 210 | 211 | if options.report and i % 20 == 19 and report: 212 | console.info('Unknown values so far:') 213 | console.info(dumps(report, options, vars(options))) 214 | 215 | if options.report: 216 | if report: 217 | console.info('Knowit %s found unknown values:', __version__) 218 | console.info(dumps(report, options, vars(options))) 219 | console.info('Please report them at %s', __url__) 220 | else: 221 | console.info('Knowit %s knows everything. :-)', __version__) 222 | 223 | 224 | if __name__ == '__main__': 225 | main(sys.argv[1:]) 226 | -------------------------------------------------------------------------------- /knowit/serializer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import re 4 | import typing 5 | from datetime import timedelta 6 | from decimal import Decimal 7 | 8 | import babelfish 9 | import yaml 10 | from yaml.composer import Composer 11 | from yaml.constructor import SafeConstructor 12 | from yaml.parser import Parser 13 | from yaml.reader import Reader 14 | from yaml.resolver import Resolver as DefaultResolver 15 | from yaml.scanner import Scanner 16 | 17 | from knowit.units import units 18 | from knowit.utils import round_decimal 19 | 20 | 21 | def format_property(profile: str, o): 22 | """Convert properties to string.""" 23 | if isinstance(o, timedelta): 24 | return format_duration(o, profile) 25 | 26 | if isinstance(o, babelfish.language.Language): 27 | return format_language(o, profile) 28 | 29 | if hasattr(o, 'units'): 30 | return format_quantity(o, profile) 31 | 32 | return str(o) 33 | 34 | 35 | def get_json_encoder(context): 36 | """Return json encoder that handles all needed object types.""" 37 | class StringEncoder(json.JSONEncoder): 38 | """String json encoder.""" 39 | 40 | def default(self, o): 41 | return format_property(context['profile'], o) 42 | 43 | return StringEncoder 44 | 45 | 46 | def get_yaml_dumper(context): 47 | """Return yaml dumper that handles all needed object types.""" 48 | class CustomDumper(yaml.SafeDumper): 49 | """Custom YAML Dumper.""" 50 | 51 | def default_representer(self, data): 52 | """Convert data to string.""" 53 | if isinstance(data, int): 54 | return self.represent_int(data) 55 | return self.represent_str(str(data)) 56 | 57 | def default_language_representer(self, data): 58 | """Convert language to string.""" 59 | return self.represent_str(format_language(data, context['profile'])) 60 | 61 | def default_quantity_representer(self, data): 62 | """Convert quantity to string.""" 63 | return self.default_representer(format_quantity(data, context['profile'])) 64 | 65 | def default_duration_representer(self, data): 66 | """Convert quantity to string.""" 67 | return self.default_representer(format_duration(data, context['profile'])) 68 | 69 | CustomDumper.add_representer(babelfish.Language, CustomDumper.default_language_representer) 70 | CustomDumper.add_representer(timedelta, CustomDumper.default_duration_representer) 71 | CustomDumper.add_representer(units.Quantity, CustomDumper.default_quantity_representer) 72 | CustomDumper.add_representer(Decimal, CustomDumper.default_representer) 73 | 74 | return CustomDumper 75 | 76 | 77 | def get_yaml_loader(constructors=None): 78 | """Return a yaml loader that handles sequences as python lists.""" 79 | constructors = constructors or {} 80 | custom_yaml_implicit_resolvers = { 81 | ch: [(tag, regexp) for tag, regexp in vs if not tag.endswith('float')] 82 | for ch, vs in DefaultResolver.yaml_implicit_resolvers.items() 83 | } 84 | 85 | class Resolver(DefaultResolver): 86 | """Custom YAML Resolver.""" 87 | 88 | yaml_implicit_resolvers = custom_yaml_implicit_resolvers 89 | 90 | Resolver.add_implicit_resolver( # regex copied from yaml source 91 | '!decimal', 92 | re.compile(r'''^(?: 93 | [-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? 94 | |\.[0-9_]+(?:[eE][-+][0-9]+)? 95 | |[-+]?[0-9][0-9_]*(?::[0-9]?[0-9])+\.[0-9_]* 96 | |[-+]?\.(?:inf|Inf|INF) 97 | |\.(?:nan|NaN|NAN) 98 | )$''', re.VERBOSE), 99 | list('-+0123456789.') 100 | ) 101 | 102 | class CustomLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): 103 | """Custom YAML Loader.""" 104 | 105 | def __init__(self, stream): 106 | Reader.__init__(self, stream) 107 | Scanner.__init__(self) 108 | Parser.__init__(self) 109 | Composer.__init__(self) 110 | SafeConstructor.__init__(self) 111 | Resolver.__init__(self) 112 | 113 | CustomLoader.add_constructor('tag:yaml.org,2002:seq', yaml.Loader.construct_python_tuple) 114 | for tag, constructor in constructors.items(): 115 | CustomLoader.add_constructor(tag, constructor) 116 | 117 | def decimal_constructor(loader, node): 118 | value = loader.construct_scalar(node) 119 | return Decimal(value) 120 | 121 | CustomLoader.add_constructor('!decimal', decimal_constructor) 122 | 123 | return CustomLoader 124 | 125 | 126 | def format_duration( 127 | duration: datetime.timedelta, 128 | profile='default', 129 | ) -> typing.Union[str, Decimal]: 130 | if profile == 'technical': 131 | return str(duration) 132 | 133 | seconds = duration.total_seconds() 134 | if profile == 'code': 135 | return round_decimal( 136 | Decimal((duration.days * 86400 + duration.seconds) * 10 ** 6 + duration.microseconds) / 10**6, min_digits=1 137 | ) 138 | 139 | hours = int(seconds // 3600) 140 | seconds = seconds - (hours * 3600) 141 | minutes = int(seconds // 60) 142 | seconds = int(seconds - (minutes * 60)) 143 | if profile == 'human': 144 | if hours > 0: 145 | return f'{hours} hours {minutes:02d} minutes {seconds:02d} seconds' 146 | if minutes > 0: 147 | return f'{minutes} minutes {seconds:02d} seconds' 148 | return f'{seconds} seconds' 149 | 150 | return f'{hours}:{minutes:02d}:{seconds:02d}' 151 | 152 | 153 | def format_language( 154 | language: babelfish.language.Language, 155 | profile: str = 'default', 156 | ) -> str: 157 | if profile in ('default', 'human'): 158 | return str(language.name) 159 | 160 | return str(language) 161 | 162 | 163 | def format_quantity( 164 | quantity, 165 | profile='default', 166 | ) -> str: 167 | """Human friendly format.""" 168 | if profile == 'code': 169 | return quantity.magnitude 170 | 171 | unit = quantity.units 172 | if unit != 'bit': 173 | technical = profile == 'technical' 174 | if unit == 'hertz': 175 | return _format_quantity(quantity.magnitude, unit='Hz', binary=technical, precision=3 if technical else 1) 176 | 177 | root_unit = quantity.to_root_units().units 178 | if root_unit == 'bit': 179 | return _format_quantity(quantity.magnitude, binary=technical, precision=3 if technical else 2) 180 | if root_unit == 'bit / second': 181 | return _format_quantity(quantity.magnitude, unit='bps', binary=technical, precision=3 if technical else 1) 182 | 183 | return str(quantity) 184 | 185 | 186 | def _format_quantity( 187 | num, 188 | unit: str = 'B', 189 | binary: bool = False, 190 | precision: int = 2, 191 | ) -> str: 192 | if binary: 193 | factor = 1024 194 | affix = 'i' 195 | else: 196 | factor = 1000 197 | affix = '' 198 | for prefix in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'): 199 | if abs(num) < factor: 200 | break 201 | num /= factor 202 | else: 203 | prefix = 'Y' 204 | 205 | return f'{num:3.{precision}f} {prefix}{affix}{unit}' 206 | 207 | 208 | YAMLLoader = get_yaml_loader() 209 | -------------------------------------------------------------------------------- /knowit/providers/enzyme.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import logging 4 | from collections import defaultdict 5 | from logging import NullHandler, getLogger 6 | import enzyme 7 | 8 | from knowit.core import Property 9 | from knowit.properties import ( 10 | AudioCodec, 11 | Basic, 12 | Duration, 13 | Language, 14 | Quantity, 15 | VideoCodec, 16 | YesNo, 17 | ) 18 | from knowit.provider import ( 19 | MalformedFileError, 20 | Provider, 21 | ) 22 | from knowit.rules import ( 23 | AudioChannelsRule, 24 | ClosedCaptionRule, 25 | HearingImpairedRule, 26 | LanguageRule, 27 | ResolutionRule, 28 | ) 29 | from knowit.rules.general import GuessTitleRule 30 | from knowit.serializer import get_json_encoder 31 | from knowit.units import units 32 | from knowit.utils import to_dict 33 | 34 | logger = getLogger(__name__) 35 | logger.addHandler(NullHandler()) 36 | 37 | 38 | class EnzymeProvider(Provider): 39 | """Enzyme Provider.""" 40 | 41 | def __init__(self, config, *args, **kwargs): 42 | """Init method.""" 43 | super().__init__(config, { 44 | 'general': { 45 | 'title': Property('title', description='media title'), 46 | 'duration': Duration('duration', description='media duration'), 47 | }, 48 | 'video': { 49 | 'id': Basic('number', data_type=int, description='video track number'), 50 | 'name': Property('name', description='video track name'), 51 | 'language': Language('language', description='video language'), 52 | 'width': Quantity('width', unit=units.pixel), 53 | 'height': Quantity('height', unit=units.pixel), 54 | 'scan_type': YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive', 55 | config=config, config_key='ScanType', 56 | description='video scan type'), 57 | 'resolution': None, # populated with ResolutionRule 58 | # 'bit_depth', Property('bit_depth', Integer('video bit depth')), 59 | 'codec': VideoCodec(config, 'codec_id', description='video codec'), 60 | 'forced': YesNo('forced', hide_value=False, description='video track forced'), 61 | 'default': YesNo('default', hide_value=False, description='video track default'), 62 | 'enabled': YesNo('enabled', hide_value=True, description='video track enabled'), 63 | }, 64 | 'audio': { 65 | 'id': Basic('number', data_type=int, description='audio track number'), 66 | 'name': Property('name', description='audio track name'), 67 | 'language': Language('language', description='audio language'), 68 | 'codec': AudioCodec(config, 'codec_id', description='audio codec'), 69 | 'channels_count': Basic('channels', data_type=int, description='audio channels count'), 70 | 'channels': None, # populated with AudioChannelsRule 71 | 'forced': YesNo('forced', hide_value=False, description='audio track forced'), 72 | 'default': YesNo('default', hide_value=False, description='audio track default'), 73 | 'enabled': YesNo('enabled', hide_value=True, description='audio track enabled'), 74 | }, 75 | 'subtitle': { 76 | 'id': Basic('number', data_type=int, description='subtitle track number'), 77 | 'name': Property('name', description='subtitle track name'), 78 | 'language': Language('language', description='subtitle language'), 79 | 'hearing_impaired': None, # populated with HearingImpairedRule 80 | 'closed_caption': None, # populated with ClosedCaptionRule 81 | 'forced': YesNo('forced', hide_value=False, description='subtitle track forced'), 82 | 'default': YesNo('default', hide_value=False, description='subtitle track default'), 83 | 'enabled': YesNo('enabled', hide_value=True, description='subtitle track enabled'), 84 | }, 85 | }, { 86 | 'video': { 87 | 'guessed': GuessTitleRule('guessed properties', private=True), 88 | 'language': LanguageRule('video language', override=True), 89 | 'resolution': ResolutionRule('video resolution'), 90 | }, 91 | 'audio': { 92 | 'guessed': GuessTitleRule('guessed properties', private=True), 93 | 'language': LanguageRule('audio language', override=True), 94 | 'channels': AudioChannelsRule('audio channels'), 95 | }, 96 | 'subtitle': { 97 | 'guessed': GuessTitleRule('guessed properties', private=True), 98 | 'language': LanguageRule('subtitle language', override=True), 99 | 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True), 100 | 'closed_caption': ClosedCaptionRule('closed caption', override=True), 101 | } 102 | }) 103 | 104 | def loaded(self) -> bool: 105 | """Return always True as enzyme is always imported.""" 106 | return True 107 | 108 | def accepts(self, video_path): 109 | """Accept only MKV files.""" 110 | return video_path.lower().endswith('.mkv') 111 | 112 | @classmethod 113 | def extract_info(cls, video_path): 114 | """Extract info from the video.""" 115 | with open(video_path, 'rb') as f: 116 | return to_dict(enzyme.MKV(f)) 117 | 118 | def describe(self, video_path, context): 119 | """Return video metadata.""" 120 | try: 121 | data = defaultdict(dict) 122 | ff = self.extract_info(video_path) 123 | 124 | def debug_data(): 125 | """Debug data.""" 126 | return json.dumps(ff, cls=get_json_encoder(context), indent=4, ensure_ascii=False) 127 | context['debug_data'] = debug_data 128 | 129 | if logger.isEnabledFor(logging.DEBUG): 130 | logger.debug('Video %r scanned using enzyme %r has raw data:\n%s', 131 | video_path, enzyme.__version__, debug_data) 132 | 133 | data.update(ff) 134 | if 'info' in data and data['info'] is None: 135 | return {} 136 | except enzyme.MalformedMKVError: # pragma: no cover 137 | raise MalformedFileError 138 | 139 | if logger.level == logging.DEBUG: 140 | logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}', 141 | video_path=video_path, version=enzyme.__version__, 142 | data=json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False)) 143 | 144 | result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'), 145 | data.get('audio_tracks'), data.get('subtitle_tracks'), context) 146 | 147 | if not result: 148 | raise MalformedFileError 149 | 150 | result['provider'] = { 151 | 'name': 'enzyme', 152 | 'version': self.version 153 | } 154 | 155 | return result 156 | 157 | @property 158 | def version(self): 159 | """Return enzyme version information.""" 160 | return {'enzyme': enzyme.__version__} 161 | -------------------------------------------------------------------------------- /tests/data/mediainfo/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "media":{ 3 | "@ref":"tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv", 4 | "track":[ 5 | { 6 | "@type":"General", 7 | "Count":"331", 8 | "StreamCount":"1", 9 | "StreamKind":"General", 10 | "StreamKind_String":"General", 11 | "StreamKindID":"0", 12 | "UniqueID":"233259075675158394332089080443022607862", 13 | "UniqueID_String":"233259075675158394332089080443022607862 (0xAF7C105968F28EDE95280D4670BC05F6)", 14 | "VideoCount":"1", 15 | "AudioCount":"1", 16 | "Video_Format_List":"AVC", 17 | "Video_Format_WithHint_List":"AVC", 18 | "Video_Codec_List":"AVC", 19 | "Audio_Format_List":"DTS XLL", 20 | "Audio_Format_WithHint_List":"DTS XLL", 21 | "Audio_Codec_List":"DTS XLL", 22 | "Audio_Language_List":"English", 23 | "CompleteName":"tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv", 24 | "FolderName":"tests/data", 25 | "FileNameExtension":"7.1-dts-hd-ma-speaker-mapping-test-file.mkv", 26 | "FileName":"7.1-dts-hd-ma-speaker-mapping-test-file", 27 | "FileExtension":"mkv", 28 | "Format":"Matroska", 29 | "Format_String":"Matroska", 30 | "Format_Url":"https://matroska.org/downloads/windows.html", 31 | "Format_Extensions":"mkv mk3d mka mks", 32 | "Format_Commercial":"Matroska", 33 | "Format_Version":"4", 34 | "FileSize":"40772443", 35 | "FileSize_String":"38.9 MiB", 36 | "FileSize_String1":"39 MiB", 37 | "FileSize_String2":"39 MiB", 38 | "FileSize_String3":"38.9 MiB", 39 | "FileSize_String4":"38.88 MiB", 40 | "Duration":"97.931", 41 | "Duration_String":"1 min 37 s", 42 | "Duration_String1":"1 min 37 s 931 ms", 43 | "Duration_String2":"1 min 37 s", 44 | "Duration_String3":"00:01:37.931", 45 | "Duration_String4":"00:01:37;22", 46 | "Duration_String5":"00:01:37.931 (00:01:37;22)", 47 | "OverallBitRate_Mode":"VBR", 48 | "OverallBitRate_Mode_String":"Variable", 49 | "OverallBitRate":"3330708", 50 | "OverallBitRate_String":"3 331 kb/s", 51 | "FrameRate":"23.976", 52 | "FrameRate_String":"23.976 FPS", 53 | "FrameCount":"2348", 54 | "IsStreamable":"Yes", 55 | "Title":"7.1Ch DTS-HD MA - Speaker Mapping Test File", 56 | "Movie":"7.1Ch DTS-HD MA - Speaker Mapping Test File", 57 | "Encoded_Date":"UTC 2013-12-13 17:49:28", 58 | "File_Modified_Date":"UTC 2016-04-10 07:47:08", 59 | "File_Modified_Date_Local":"2016-04-10 09:47:08", 60 | "Encoded_Application":"mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec 1 2013 17:55:00", 61 | "Encoded_Application_String":"mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec 1 2013 17:55:00", 62 | "Encoded_Library":"libebml v1.3.0 + libmatroska v1.4.1", 63 | "Encoded_Library_String":"libebml v1.3.0 + libmatroska v1.4.1" 64 | }, 65 | { 66 | "@type":"Video", 67 | "Count":"379", 68 | "StreamCount":"1", 69 | "StreamKind":"Video", 70 | "StreamKind_String":"Video", 71 | "StreamKindID":"0", 72 | "StreamOrder":"0", 73 | "ID":"1", 74 | "ID_String":"1", 75 | "UniqueID":"11835337130358454411", 76 | "Format":"AVC", 77 | "Format_String":"AVC", 78 | "Format_Info":"Advanced Video Codec", 79 | "Format_Url":"http://developers.videolan.org/x264.html", 80 | "Format_Commercial":"AVC", 81 | "Format_Profile":"Main", 82 | "Format_Level":"4", 83 | "Format_Settings":"CABAC / 4 Ref Frames", 84 | "Format_Settings_CABAC":"Yes", 85 | "Format_Settings_CABAC_String":"Yes", 86 | "Format_Settings_RefFrames":"4", 87 | "Format_Settings_RefFrames_String":"4 frames", 88 | "InternetMediaType":"video/H264", 89 | "CodecID":"V_MPEG4/ISO/AVC", 90 | "CodecID_Url":"http://ffdshow-tryout.sourceforge.net/", 91 | "Duration":"97.931", 92 | "Duration_String":"1 min 37 s", 93 | "Duration_String1":"1 min 37 s 931 ms", 94 | "Duration_String2":"1 min 37 s", 95 | "Duration_String3":"00:01:37.931", 96 | "Duration_String4":"00:01:37;22", 97 | "Duration_String5":"00:01:37.931 (00:01:37;22)", 98 | "BitRate_Mode":"VBR", 99 | "BitRate_Mode_String":"Variable", 100 | "BitRate_Maximum":"40000000", 101 | "BitRate_Maximum_String":"40.0 Mb/s", 102 | "Width":"1920", 103 | "Width_String":"1 920 pixels", 104 | "Height":"1080", 105 | "Height_String":"1 080 pixels", 106 | "Stored_Height":"1088", 107 | "Sampled_Width":"1920", 108 | "Sampled_Height":"1080", 109 | "PixelAspectRatio":"1.000", 110 | "DisplayAspectRatio":"1.778", 111 | "DisplayAspectRatio_String":"16:9", 112 | "FrameRate_Mode":"CFR", 113 | "FrameRate_Mode_String":"Constant", 114 | "FrameRate":"23.976", 115 | "FrameRate_String":"23.976 FPS", 116 | "FrameCount":"2348", 117 | "ColorSpace":"YUV", 118 | "ChromaSubsampling":"4:2:0", 119 | "ChromaSubsampling_String":"4:2:0", 120 | "BitDepth":"8", 121 | "BitDepth_String":"8 bits", 122 | "ScanType":"Progressive", 123 | "ScanType_String":"Progressive", 124 | "Delay":"0.000", 125 | "Delay_String3":"00:00:00.000", 126 | "Delay_Source":"Container", 127 | "Delay_Source_String":"Container", 128 | "Default":"Yes", 129 | "Default_String":"Yes", 130 | "Forced":"No", 131 | "Forced_String":"No", 132 | "BufferSize":"30000000", 133 | "extra":{ 134 | "FrameCount_Source":"General_Duration", 135 | "Duration_Source":"General_Duration" 136 | } 137 | }, 138 | { 139 | "@type":"Audio", 140 | "Count":"282", 141 | "StreamCount":"1", 142 | "StreamKind":"Audio", 143 | "StreamKind_String":"Audio", 144 | "StreamKindID":"0", 145 | "StreamOrder":"1", 146 | "ID":"2", 147 | "ID_String":"2", 148 | "UniqueID":"11679026580382524291", 149 | "Format":"DTS", 150 | "Format_String":"DTS XLL", 151 | "Format_Info":"Digital Theater Systems", 152 | "Format_Url":"https://en.wikipedia.org/wiki/DTS_(sound_system)", 153 | "Format_Commercial":"DTS-HD Master Audio", 154 | "Format_Commercial_IfAny":"DTS-HD Master Audio", 155 | "Format_Settings_Mode":"16", 156 | "Format_Settings_Endianness":"Big", 157 | "Format_AdditionalFeatures":"XLL", 158 | "CodecID":"A_DTS", 159 | "Duration":"97.931", 160 | "Duration_String":"1 min 37 s", 161 | "Duration_String1":"1 min 37 s 931 ms", 162 | "Duration_String2":"1 min 37 s", 163 | "Duration_String3":"00:01:37.931", 164 | "Duration_String5":"00:01:37.931", 165 | "BitRate_Mode":"VBR", 166 | "BitRate_Mode_String":"Variable", 167 | "Channels":"6", 168 | "Channels_String":"6 channels", 169 | "Channels_Original":"8", 170 | "Channels_Original_String":"8 channels", 171 | "ChannelPositions_Original":"Front: L C R, Side: L R, Back: L R, LFE", 172 | "ChannelLayout_Original":"C L R LFE Lsr Rsr Lss Rss", 173 | "SamplesPerFrame":"512", 174 | "SamplingRate":"48000", 175 | "SamplingRate_String":"48.0 kHz", 176 | "SamplingCount":"4700688", 177 | "FrameRate":"93.750", 178 | "FrameRate_String":"93.750 FPS (512 SPF)", 179 | "BitDepth":"24", 180 | "BitDepth_String":"24 bits", 181 | "Compression_Mode":"Lossless", 182 | "Compression_Mode_String":"Lossless", 183 | "Delay":"0.000", 184 | "Delay_String3":"00:00:00.000", 185 | "Delay_Source":"Container", 186 | "Delay_Source_String":"Container", 187 | "Video_Delay":"0.000", 188 | "Video_Delay_String3":"00:00:00.000", 189 | "Title":"7.1Ch DTS-HD MA", 190 | "Language":"en", 191 | "Language_String":"English", 192 | "Language_String1":"English", 193 | "Language_String2":"en", 194 | "Language_String3":"eng", 195 | "Language_String4":"en", 196 | "Default":"Yes", 197 | "Default_String":"Yes", 198 | "Forced":"No", 199 | "Forced_String":"No", 200 | "extra":{ 201 | "SamplingCount_Source":"General_Duration", 202 | "Duration_Source":"General_Duration" 203 | } 204 | } 205 | ] 206 | } 207 | } -------------------------------------------------------------------------------- /knowit/core.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from logging import NullHandler, getLogger 3 | 4 | logger = getLogger(__name__) 5 | logger.addHandler(NullHandler()) 6 | 7 | T = typing.TypeVar('T') 8 | 9 | _visible_chars_table = dict.fromkeys(range(32)) 10 | 11 | 12 | def _is_unknown(value: typing.Any) -> bool: 13 | return isinstance(value, str) and (not value or value.lower() == 'unknown') 14 | 15 | 16 | class Reportable(typing.Generic[T]): 17 | """Reportable abstract class.""" 18 | 19 | def __init__( 20 | self, 21 | *args: str, 22 | description: typing.Optional[str] = None, 23 | reportable: bool = True, 24 | ): 25 | """Initialize the object.""" 26 | self.names = args 27 | self._description = description 28 | self.reportable = reportable 29 | 30 | @property 31 | def description(self) -> str: 32 | """Rule description.""" 33 | return self._description or '|'.join(self.names) 34 | 35 | def report(self, value: typing.Union[str, T], context: typing.MutableMapping) -> None: 36 | """Report unknown value.""" 37 | if not value or not self.reportable: 38 | return 39 | 40 | if 'report' in context: 41 | report_map = context['report'].setdefault(self.description, {}) 42 | if value not in report_map: 43 | report_map[value] = context['path'] 44 | logger.info('Invalid %s: %r', self.description, value) 45 | 46 | 47 | class Property(Reportable[T]): 48 | """Property class.""" 49 | 50 | def __init__( 51 | self, 52 | *args: str, 53 | default: typing.Optional[T] = None, 54 | private: bool = False, 55 | description: typing.Optional[str] = None, 56 | delimiter: str = ' / ', 57 | **kwargs, 58 | ): 59 | """Init method.""" 60 | super().__init__(*args, description=description, **kwargs) 61 | self.default = default 62 | self.private = private 63 | # Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive 64 | self.delimiter = delimiter 65 | 66 | @classmethod 67 | def _extract_value(cls, 68 | track: typing.Mapping, 69 | name: str, 70 | names: typing.List[str]): 71 | if len(names) == 2: 72 | parent_value = track.get(names[0], track.get(names[0].upper(), {})) 73 | return parent_value.get(names[1], parent_value.get(names[1].upper())) 74 | 75 | return track.get(name, track.get(name.upper())) 76 | 77 | def extract_value( 78 | self, 79 | track: typing.Mapping, 80 | context: typing.MutableMapping, 81 | ) -> typing.Optional[T]: 82 | """Extract the property value from a given track.""" 83 | for name in self.names: 84 | names = name.split('.') 85 | value = self._extract_value(track, name, names) 86 | if value is None: 87 | if self.default is None: 88 | continue 89 | 90 | value = self.default 91 | 92 | if isinstance(value, bytes): 93 | value = value.decode() 94 | 95 | if isinstance(value, str): 96 | value = value.translate(_visible_chars_table).strip() 97 | if _is_unknown(value): 98 | continue 99 | value = self._deduplicate(value) 100 | 101 | result = self.handle(value, context) 102 | if result is not None and not _is_unknown(result): 103 | return result 104 | 105 | return None 106 | 107 | @classmethod 108 | def _deduplicate(cls, value: str) -> str: 109 | values = value.split(' / ') 110 | if len(values) == 2 and values[0] == values[1]: 111 | return values[0] 112 | return value 113 | 114 | def handle(self, value: T, context: typing.MutableMapping) -> typing.Optional[T]: 115 | """Return the value without any modification.""" 116 | return value 117 | 118 | 119 | class Configurable(Property[T]): 120 | """Configurable property where values are in a config mapping.""" 121 | 122 | def __init__(self, config: typing.Mapping[str, typing.Mapping], *args: str, 123 | config_key: typing.Optional[str] = None, **kwargs): 124 | """Init method.""" 125 | super().__init__(*args, **kwargs) 126 | self.mapping = getattr(config, config_key or self.__class__.__name__) if config else {} 127 | 128 | @classmethod 129 | def _extract_key(cls, value: str) -> typing.Union[str, bool]: 130 | return value.upper() 131 | 132 | @classmethod 133 | def _extract_fallback_key(cls, value: str, key: str) -> typing.Optional[T]: 134 | return None 135 | 136 | def _lookup( 137 | self, 138 | key: str, 139 | context: typing.MutableMapping, 140 | ) -> typing.Union[T, None, bool]: 141 | result = self.mapping.get(key) 142 | if result is not None: 143 | result = getattr(result, context.get('profile') or 'default') 144 | return result if result != '__ignored__' else False 145 | return None 146 | 147 | def handle(self, value, context): 148 | """Return Variable or Constant.""" 149 | key = self._extract_key(value) 150 | if key is False: 151 | return None 152 | 153 | result = self._lookup(key, context) 154 | if result is False: 155 | return None 156 | 157 | while not result and key: 158 | key = self._extract_fallback_key(value, key) 159 | result = self._lookup(key, context) 160 | if result is False: 161 | return None 162 | 163 | if not result: 164 | self.report(value, context) 165 | 166 | return result 167 | 168 | 169 | class MultiValue(Property): 170 | """Property with multiple values.""" 171 | 172 | def __init__(self, prop: typing.Optional[Property] = None, delimiter='/', single=False, 173 | handler: typing.Optional[ 174 | typing.Callable[[typing.Optional[str], typing.MutableMapping], typing.Optional[str]]] = None, 175 | name=None, **kwargs): 176 | """Init method.""" 177 | super().__init__(*(prop.names if prop else (name,)), **kwargs) 178 | self.prop = prop 179 | self.delimiter = delimiter 180 | self.single = single 181 | self.handler = handler 182 | 183 | def handle( 184 | self, 185 | value: str, 186 | context: typing.MutableMapping, 187 | ) -> typing.Optional[typing.Union[str, typing.List[str]]]: 188 | """Handle properties with multiple values.""" 189 | if self.handler: 190 | call = self.handler 191 | elif self.prop: 192 | call = self.prop.handle 193 | else: 194 | call = None 195 | 196 | if call is None: 197 | raise NotImplementedError('No handler available') 198 | 199 | result = call(value, context) 200 | if result is not None: 201 | return result 202 | 203 | if isinstance(value, list): 204 | if len(value) == 1: 205 | values = self._split(value[0], self.delimiter) 206 | else: 207 | values = value 208 | else: 209 | values = self._split(value, self.delimiter) 210 | 211 | if values is None: 212 | return call(values, context) 213 | if len(values) > 1 and not self.single: 214 | part_results = [call(item, context) if not _is_unknown(item) else None for item in values] 215 | results = [r for r in part_results if r is not None] 216 | if results: 217 | return results 218 | return call(values[0], context) 219 | 220 | @classmethod 221 | def _split( 222 | cls, 223 | value: typing.Optional[T], 224 | delimiter: str = '/', 225 | ) -> typing.Optional[typing.List[str]]: 226 | if value is None: 227 | return None 228 | 229 | return [x.strip() for x in str(value).split(delimiter)] 230 | 231 | 232 | class Rule(Reportable[T]): 233 | """Rule abstract class.""" 234 | 235 | def __init__(self, name: str, private=False, override=False, **kwargs): 236 | """Initialize the object.""" 237 | super().__init__(name, **kwargs) 238 | self.private = private 239 | self.override = override 240 | 241 | def execute(self, props, pv_props, context: typing.Mapping): 242 | """How to execute a rule.""" 243 | raise NotImplementedError 244 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KnowIt 2 | 3 | Know better your media files. 4 | 5 | [![Latest 6 | Version](https://img.shields.io/pypi/v/knowit.svg)](https://pypi.python.org/pypi/knowit) 7 | 8 | [![tests](https://github.com/ratoaq2/knowit/actions/workflows/test.yml/badge.svg)](https://github.com/ratoaq2/knowit/actions/workflows/test.yml) 9 | 10 | [![License](https://img.shields.io/github/license/ratoaq2/knowit.svg)](https://github.com/ratoaq2/knowit/blob/master/LICENSE) 11 | 12 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/knowit) 13 | 14 | - Project page 15 | 16 | 17 | ## Usage 18 | 19 | ### CLI 20 | 21 | Extract information from a video file: 22 | 23 | $ knowit /folder/Audio Samples/hd_dtsma_7.1.mkv 24 | For: /folder/Audio Samples/hd_dtsma_7.1.mkv 25 | Knowit 0.4.0 found: 26 | { 27 | "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File", 28 | "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv", 29 | "duration": "0:01:37", 30 | "size": "40.77 MB", 31 | "bit_rate": "3.3 Mbps", 32 | "container": "mkv", 33 | "video": [ 34 | { 35 | "id": 1, 36 | "duration": "0:01:37", 37 | "width": "1920 pixel", 38 | "height": "1080 pixel", 39 | "scan_type": "Progressive", 40 | "aspect_ratio": "1.778", 41 | "pixel_aspect_ratio": "1.0", 42 | "resolution": "1080p", 43 | "frame_rate": "23.976 FPS", 44 | "bit_depth": "8 bit", 45 | "codec": "H.264", 46 | "profile": "Main", 47 | "profile_level": "4", 48 | "media_type": "video/H264", 49 | "default": true 50 | } 51 | ], 52 | "audio": [ 53 | { 54 | "id": 2, 55 | "name": "7.1Ch DTS-HD MA", 56 | "language": "English", 57 | "duration": "0:01:37", 58 | "codec": "DTS-HD", 59 | "profile": "Master Audio", 60 | "channels_count": 8, 61 | "channels": "7.1", 62 | "bit_depth": "24 bit", 63 | "bit_rate_mode": "Variable", 64 | "sampling_rate": "48.0 KHz", 65 | "compression": "Lossless", 66 | "default": true 67 | } 68 | ], 69 | "provider": { 70 | "name": "mediainfo", 71 | "version": { 72 | "pymediainfo": "5.0.3", 73 | "libmediainfo.so.0": "v20.9" 74 | } 75 | } 76 | } 77 | 78 | Extract information from a video file using ffmpeg: 79 | 80 | $ knowit --provider ffmpeg /folder/Audio Samples/hd_dtsma_7.1.mkv 81 | For: /folder/Audio Samples/hd_dtsma_7.1.mkv 82 | Knowit 0.4.0 found: 83 | { 84 | "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File", 85 | "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv", 86 | "duration": "0:01:37", 87 | "size": "40.77 MB", 88 | "bit_rate": "3.3 Mbps", 89 | "container": "mkv", 90 | "video": [ 91 | { 92 | "id": 0, 93 | "width": "1920 pixel", 94 | "height": "1080 pixel", 95 | "scan_type": "Progressive", 96 | "aspect_ratio": "1.778", 97 | "pixel_aspect_ratio": "1.0", 98 | "resolution": "1080p", 99 | "frame_rate": "23.976 FPS", 100 | "bit_depth": "8 bit", 101 | "codec": "H.264", 102 | "profile": "Main", 103 | "default": true 104 | } 105 | ], 106 | "audio": [ 107 | { 108 | "id": 1, 109 | "name": "7.1Ch DTS-HD MA", 110 | "language": "English", 111 | "codec": "DTS-HD", 112 | "profile": "Master Audio", 113 | "channels_count": 8, 114 | "channels": "7.1", 115 | "bit_depth": "24 bit", 116 | "sampling_rate": "48.0 KHz", 117 | "default": true 118 | } 119 | ], 120 | "provider": { 121 | "name": "ffmpeg", 122 | "version": { 123 | "ffprobe": "v4.2.4-1ubuntu0.1" 124 | } 125 | } 126 | } 127 | 128 | Using docker: 129 | 130 | docker run -it --rm -v /folder:/folder knowit /folder/Audio Samples/hd_dtsma_7.1.mkv 131 | For: /folder/Audio Samples/hd_dtsma_7.1.mkv 132 | Knowit 0.4.0 found: 133 | { 134 | "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File", 135 | "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv", 136 | "duration": "0:01:37", 137 | "size": "40.77 MB", 138 | "bit_rate": "3.3 Mbps", 139 | "container": "mkv", 140 | "video": [ 141 | { 142 | "id": 1, 143 | "duration": "0:01:37", 144 | "width": "1920 pixel", 145 | "height": "1080 pixel", 146 | "scan_type": "Progressive", 147 | "aspect_ratio": "1.778", 148 | "pixel_aspect_ratio": "1.0", 149 | "resolution": "1080p", 150 | "frame_rate": "23.976 FPS", 151 | "bit_depth": "8 bit", 152 | "codec": "H.264", 153 | "profile": "Main", 154 | "profile_level": "4", 155 | "media_type": "video/H264", 156 | "default": true 157 | } 158 | ], 159 | "audio": [ 160 | { 161 | "id": 2, 162 | "name": "7.1Ch DTS-HD MA", 163 | "language": "English", 164 | "duration": "0:01:37", 165 | "codec": "DTS-HD", 166 | "profile": "Master Audio", 167 | "channels_count": 8, 168 | "channels": "7.1", 169 | "bit_depth": "24 bit", 170 | "bit_rate_mode": "Variable", 171 | "sampling_rate": "48.0 KHz", 172 | "compression": "Lossless", 173 | "default": true 174 | } 175 | ], 176 | "provider": { 177 | "name": "mediainfo", 178 | "version": { 179 | "pymediainfo": "5.0.3", 180 | "libmediainfo.so.0": "v20.9" 181 | } 182 | } 183 | } 184 | 185 | All available CLI options: 186 | 187 | $ knowit --help 188 | usage: knowit [-h] [-p PROVIDER] [--debug] [--report] [-y] [-N] [-P PROFILE] [--mediainfo MEDIAINFO] [--ffmpeg FFMPEG] [--mkvmerge MKVMERGE] [--version] [videopath [videopath ...]] 189 | 190 | positional arguments: 191 | videopath Path to the video to introspect 192 | 193 | optional arguments: 194 | -h, --help show this help message and exit 195 | 196 | Providers: 197 | -p PROVIDER, --provider PROVIDER 198 | The provider to be used: mediainfo, ffmpeg, mkvmerge or enzyme. 199 | 200 | Output: 201 | --debug Print information for debugging knowit and for reporting bugs. 202 | --report Parse media and report all non-detected values 203 | -y, --yaml Display output in yaml format 204 | -N, --no-units Display output without units 205 | -P PROFILE, --profile PROFILE 206 | Display values according to specified profile: code, default, human, technical 207 | 208 | Configuration: 209 | --mediainfo MEDIAINFO 210 | The location to search for MediaInfo binaries 211 | --ffmpeg FFMPEG The location to search for ffprobe (FFmpeg) binaries 212 | --mkvmerge MKVMERGE The location to search for mkvmerge (MKVToolNix) binaries 213 | 214 | Information: 215 | --version Display knowit version. 216 | 217 | ## Installation 218 | 219 | KnowIt can be installed as a regular python module by running: 220 | 221 | $ [sudo] pip install knowit 222 | 223 | For a better isolation with your system you should use a dedicated 224 | virtualenv or install for your user only using the `--user` flag. 225 | 226 | ## External dependencies 227 | 228 | KnowIt can use MediaInfo, ffprobe (FFmpeg) or mkvmerge (MKVToolNix) 229 | 230 | KnowIt supports MKV regardless if MediaInfo, FFmpeg or MKVToolNix are 231 | installed. 232 | 233 | MediaInfo, FFmpeg or MKVToolNix increases the number of supported 234 | formats and the number of extracted information. 235 | 236 | MediaInfo is the default provider. Visit their 237 | [website](http://mediaarea.net/MediaInfo) and install the proper package 238 | for your system. 239 | 240 | ffprobe (FFmpeg) can be downloaded 241 | [here](https://ffmpeg.org/download.html) 242 | 243 | mkvmerge (MKVToolNix) can be downloaded 244 | [here](https://mkvtoolnix.download/downloads.html) 245 | -------------------------------------------------------------------------------- /tests/data/enzyme/issue-24-example-01.mkv.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "title": "The 100 06x09 (What You Take With You) - release by messafan for CasStudio", 4 | "duration": "0:42:05", 5 | "date_utc": "2019-07-26 11:08:51", 6 | "muxing_app": "libebml v1.3.7 + libmatroska v1.5.0", 7 | "writing_app": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit" 8 | }, 9 | "video_tracks": [ 10 | { 11 | "type": 1, 12 | "number": 1, 13 | "language": "und", 14 | "enabled": true, 15 | "default": true, 16 | "forced": false, 17 | "lacing": false, 18 | "codec_id": "V_MPEG4/ISO/AVC", 19 | "width": 1280, 20 | "height": 720, 21 | "interlaced": false, 22 | "stereo_mode": 0, 23 | "crop": {}, 24 | "display_width": 1280, 25 | "display_height": 720, 26 | "aspect_ratio_type": 0 27 | } 28 | ], 29 | "audio_tracks": [ 30 | { 31 | "type": 2, 32 | "number": 2, 33 | "language": "por", 34 | "enabled": true, 35 | "default": true, 36 | "forced": true, 37 | "lacing": true, 38 | "codec_id": "A_AC3", 39 | "sampling_frequency": 48000.0, 40 | "channels": 2, 41 | "output_sampling_frequency": 48000.0 42 | }, 43 | { 44 | "type": 2, 45 | "number": 3, 46 | "name": "Stereo", 47 | "language": "eng", 48 | "enabled": true, 49 | "default": false, 50 | "forced": false, 51 | "lacing": true, 52 | "codec_id": "A_AC3", 53 | "sampling_frequency": 48000.0, 54 | "channels": 2, 55 | "output_sampling_frequency": 48000.0 56 | } 57 | ], 58 | "subtitle_tracks": [], 59 | "chapters": [], 60 | "tags": [ 61 | { 62 | "targets": [ 63 | { 64 | "id": 26826, 65 | "type": 1, 66 | "name": "TargetTypeValue", 67 | "level": 4, 68 | "position": 652854402, 69 | "size": 1, 70 | "data": 50 71 | } 72 | ], 73 | "simpletags": [ 74 | { 75 | "name": "ENCODER", 76 | "language": "und", 77 | "default": true, 78 | "string": "Lavf58.20.100" 79 | } 80 | ] 81 | }, 82 | { 83 | "targets": [ 84 | { 85 | "id": 26826, 86 | "type": 1, 87 | "name": "TargetTypeValue", 88 | "level": 4, 89 | "position": 652854449, 90 | "size": 1, 91 | "data": 50 92 | }, 93 | { 94 | "id": 25541, 95 | "type": 1, 96 | "name": "TagTrackUID", 97 | "level": 4, 98 | "position": 652854453, 99 | "size": 8, 100 | "data": 12656891187800456645 101 | }, 102 | { 103 | "id": 25546, 104 | "type": 3, 105 | "name": "TargetType", 106 | "level": 4, 107 | "position": 652854464, 108 | "size": 5, 109 | "data": "MOVIE" 110 | } 111 | ], 112 | "simpletags": [ 113 | { 114 | "name": "BPS", 115 | "language": "eng", 116 | "default": true, 117 | "string": "192000" 118 | }, 119 | { 120 | "name": "DURATION", 121 | "language": "eng", 122 | "default": true, 123 | "string": "00:42:05.056000000" 124 | }, 125 | { 126 | "name": "NUMBER_OF_FRAMES", 127 | "language": "eng", 128 | "default": true, 129 | "string": "78908" 130 | }, 131 | { 132 | "name": "NUMBER_OF_BYTES", 133 | "language": "eng", 134 | "default": true, 135 | "string": "60601344" 136 | }, 137 | { 138 | "name": "_STATISTICS_WRITING_APP", 139 | "language": "eng", 140 | "default": true, 141 | "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit" 142 | }, 143 | { 144 | "name": "_STATISTICS_WRITING_DATE_UTC", 145 | "language": "eng", 146 | "default": true, 147 | "string": "2019-07-26 11:08:51" 148 | }, 149 | { 150 | "name": "_STATISTICS_TAGS", 151 | "language": "eng", 152 | "default": true, 153 | "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 154 | } 155 | ] 156 | }, 157 | { 158 | "targets": [ 159 | { 160 | "id": 26826, 161 | "type": 1, 162 | "name": "TargetTypeValue", 163 | "level": 4, 164 | "position": 652854836, 165 | "size": 1, 166 | "data": 50 167 | }, 168 | { 169 | "id": 25541, 170 | "type": 1, 171 | "name": "TagTrackUID", 172 | "level": 4, 173 | "position": 652854840, 174 | "size": 1, 175 | "data": 1 176 | }, 177 | { 178 | "id": 25546, 179 | "type": 3, 180 | "name": "TargetType", 181 | "level": 4, 182 | "position": 652854844, 183 | "size": 5, 184 | "data": "MOVIE" 185 | } 186 | ], 187 | "simpletags": [ 188 | { 189 | "name": "BPS", 190 | "language": "eng", 191 | "default": true, 192 | "string": "1749265" 193 | }, 194 | { 195 | "name": "DURATION", 196 | "language": "eng", 197 | "default": true, 198 | "string": "00:42:01.186000000" 199 | }, 200 | { 201 | "name": "NUMBER_OF_FRAMES", 202 | "language": "eng", 203 | "default": true, 204 | "string": "60448" 205 | }, 206 | { 207 | "name": "NUMBER_OF_BYTES", 208 | "language": "eng", 209 | "default": true, 210 | "string": "551278026" 211 | }, 212 | { 213 | "name": "_STATISTICS_WRITING_APP", 214 | "language": "eng", 215 | "default": true, 216 | "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit" 217 | }, 218 | { 219 | "name": "_STATISTICS_WRITING_DATE_UTC", 220 | "language": "eng", 221 | "default": true, 222 | "string": "2019-07-26 11:08:51" 223 | }, 224 | { 225 | "name": "_STATISTICS_TAGS", 226 | "language": "eng", 227 | "default": true, 228 | "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 229 | } 230 | ] 231 | }, 232 | { 233 | "targets": [ 234 | { 235 | "id": 26826, 236 | "type": 1, 237 | "name": "TargetTypeValue", 238 | "level": 4, 239 | "position": 652855218, 240 | "size": 1, 241 | "data": 50 242 | }, 243 | { 244 | "id": 25541, 245 | "type": 1, 246 | "name": "TagTrackUID", 247 | "level": 4, 248 | "position": 652855222, 249 | "size": 1, 250 | "data": 2 251 | }, 252 | { 253 | "id": 25546, 254 | "type": 3, 255 | "name": "TargetType", 256 | "level": 4, 257 | "position": 652855226, 258 | "size": 5, 259 | "data": "MOVIE" 260 | } 261 | ], 262 | "simpletags": [ 263 | { 264 | "name": "BPS", 265 | "language": "eng", 266 | "default": true, 267 | "string": "128000" 268 | }, 269 | { 270 | "name": "DURATION", 271 | "language": "eng", 272 | "default": true, 273 | "string": "00:42:01.216000000" 274 | }, 275 | { 276 | "name": "NUMBER_OF_FRAMES", 277 | "language": "eng", 278 | "default": true, 279 | "string": "78788" 280 | }, 281 | { 282 | "name": "NUMBER_OF_BYTES", 283 | "language": "eng", 284 | "default": true, 285 | "string": "40339456" 286 | }, 287 | { 288 | "name": "_STATISTICS_WRITING_APP", 289 | "language": "eng", 290 | "default": true, 291 | "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit" 292 | }, 293 | { 294 | "name": "_STATISTICS_WRITING_DATE_UTC", 295 | "language": "eng", 296 | "default": true, 297 | "string": "2019-07-26 11:08:51" 298 | }, 299 | { 300 | "name": "_STATISTICS_TAGS", 301 | "language": "eng", 302 | "default": true, 303 | "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" 304 | } 305 | ] 306 | } 307 | ], 308 | "recurse_seek_head": false 309 | } -------------------------------------------------------------------------------- /knowit/providers/mkvmerge.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import logging 4 | import re 5 | from decimal import Decimal 6 | from logging import NullHandler, getLogger 7 | from subprocess import check_output 8 | from typing import Union 9 | 10 | from knowit.core import Property 11 | from knowit.properties import ( 12 | AudioCodec, 13 | Basic, 14 | Duration, 15 | Language, 16 | Quantity, 17 | VideoCodec, 18 | VideoDimensions, 19 | YesNo, 20 | ) 21 | from knowit.provider import ( 22 | Executor, 23 | MalformedFileError, 24 | NotFoundExecutor, 25 | Provider, 26 | ) 27 | from knowit.rules import ( 28 | AudioChannelsRule, 29 | ClosedCaptionRule, 30 | HearingImpairedRule, 31 | LanguageRule, 32 | ResolutionRule, 33 | ) 34 | from knowit.rules.general import GuessTitleRule 35 | from knowit.serializer import get_json_encoder 36 | from knowit.units import units 37 | from knowit.utils import define_candidate, detect_os 38 | 39 | logger = getLogger(__name__) 40 | logger.addHandler(NullHandler()) 41 | 42 | WARN_MSG = r''' 43 | ========================================================================================= 44 | mkvmerge not found on your system or could not be loaded. 45 | Visit https://mkvtoolnix.download to download it. 46 | If you still have problems, please check if the downloaded version matches your system. 47 | To load mkvmerge from a specific location, please define the location as follow: 48 | knowit --mkvmerge /usr/local/mkvmerge/bin 49 | knowit --mkvmerge /usr/local/mkvmerge/bin/ffprobe 50 | knowit --mkvmerge "C:\Program Files\mkvmerge" 51 | knowit --mkvmerge C:\Software\mkvmerge.exe 52 | ========================================================================================= 53 | ''' 54 | 55 | 56 | class MkvMergeExecutor(Executor): 57 | """Executor that knows how to execute mkvmerge.""" 58 | 59 | version_re = re.compile(r'\bv(?P[^\b\s]+)') 60 | locations = { 61 | 'unix': ('/usr/local/mkvmerge/lib', '/usr/local/mkvmerge/bin', '__PATH__'), 62 | 'windows': ('__PATH__', ), 63 | 'macos': ('__PATH__', ), 64 | } 65 | 66 | def extract_info(self, filename): 67 | """Extract media info.""" 68 | json_dump = self._execute(filename) 69 | return json.loads(json_dump) if json_dump else {} 70 | 71 | def _execute(self, filename): 72 | raise NotImplementedError 73 | 74 | @classmethod 75 | def _get_version(cls, output): 76 | match = cls.version_re.search(output) 77 | if match: 78 | version = match.groupdict()['version'] 79 | return version 80 | 81 | @classmethod 82 | def get_executor_instance(cls, suggested_path=None) -> Union["MkvMergeExecutor", NotFoundExecutor]: 83 | """Return executor instance.""" 84 | os_family = detect_os() 85 | logger.debug('Detected os: %s', os_family) 86 | for exec_cls in (MkvMergeCliExecutor, ): 87 | executor = exec_cls.create(os_family, suggested_path) 88 | if executor: 89 | return executor 90 | return NotFoundExecutor(suggested_path) 91 | 92 | 93 | class MkvMergeCliExecutor(MkvMergeExecutor): 94 | """Executor that uses mkvmerge cli.""" 95 | 96 | names = { 97 | 'unix': ('mkvmerge', ), 98 | 'windows': ('mkvmerge.exe', ), 99 | 'macos': ('mkvmerge', ), 100 | } 101 | 102 | def _execute(self, filename): 103 | return check_output([self.location, '-i', '-F', 'json', filename]).decode() 104 | 105 | @classmethod 106 | def create(cls, os_family=None, suggested_path=None): 107 | """Create the executor instance.""" 108 | for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path): 109 | try: 110 | output = check_output([candidate, '--version']).decode() 111 | version = cls._get_version(output) 112 | if version: 113 | logger.debug('MkvMerge cli detected: %s v%s', candidate, version) 114 | return MkvMergeCliExecutor(candidate, version.split('.')) 115 | except OSError: 116 | pass 117 | 118 | 119 | class MkvMergeProvider(Provider): 120 | """MkvMerge Provider.""" 121 | 122 | def __init__(self, config, suggested_path=None, *args, **kwargs): 123 | """Init method.""" 124 | super().__init__(config, { 125 | 'general': { 126 | 'title': Property('title', description='media title'), 127 | 'duration': Duration('duration', resolution=Decimal('0.000001'), description='media duration'), 128 | }, 129 | 'video': { 130 | 'id': Basic('number', data_type=int, description='video track number'), 131 | 'name': Property('name', description='video track name'), 132 | 'language': Language('language_ietf', 'language', description='video language'), 133 | 'width': VideoDimensions('display_dimensions', dimension='width'), 134 | 'height': VideoDimensions('display_dimensions', dimension='height'), 135 | 'scan_type': YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive', 136 | config=config, config_key='ScanType', 137 | description='video scan type'), 138 | 'resolution': None, # populated with ResolutionRule 139 | # 'bit_depth', Property('bit_depth', Integer('video bit depth')), 140 | 'codec': VideoCodec(config, 'codec_id', description='video codec'), 141 | 'forced': YesNo('forced_track', hide_value=False, description='video track forced'), 142 | 'default': YesNo('default_track', hide_value=False, description='video track default'), 143 | 'enabled': YesNo('enabled_track', hide_value=True, description='video track enabled'), 144 | }, 145 | 'audio': { 146 | 'id': Basic('number', data_type=int, description='audio track number'), 147 | 'name': Property('name', description='audio track name'), 148 | 'language': Language('language_ietf', 'language', description='audio language'), 149 | 'codec': AudioCodec(config, 'codec_id', description='audio codec'), 150 | 'channels_count': Basic('audio_channels', data_type=int, description='audio channels count'), 151 | 'channels': None, # populated with AudioChannelsRule 152 | 'sampling_rate': Quantity('audio_sampling_frequency', unit=units.Hz, description='audio sampling rate'), 153 | 'forced': YesNo('forced_track', hide_value=False, description='audio track forced'), 154 | 'default': YesNo('default_track', hide_value=False, description='audio track default'), 155 | 'enabled': YesNo('enabled_track', hide_value=True, description='audio track enabled'), 156 | }, 157 | 'subtitle': { 158 | 'id': Basic('number', data_type=int, description='subtitle track number'), 159 | 'name': Property('name', description='subtitle track name'), 160 | 'language': Language('language_ietf', 'language', description='subtitle language'), 161 | 'hearing_impaired': None, # populated with HearingImpairedRule 162 | 'closed_caption': None, # populated with ClosedCaptionRule 163 | 'forced': YesNo('forced_track', hide_value=False, description='subtitle track forced'), 164 | 'default': YesNo('default_track', hide_value=False, description='subtitle track default'), 165 | 'enabled': YesNo('enabled_track', hide_value=True, description='subtitle track enabled'), 166 | }, 167 | }, { 168 | 'video': { 169 | 'guessed': GuessTitleRule('guessed properties', private=True), 170 | 'language': LanguageRule('video language', override=True), 171 | 'resolution': ResolutionRule('video resolution'), 172 | }, 173 | 'audio': { 174 | 'guessed': GuessTitleRule('guessed properties', private=True), 175 | 'language': LanguageRule('audio language', override=True), 176 | 'channels': AudioChannelsRule('audio channels'), 177 | }, 178 | 'subtitle': { 179 | 'guessed': GuessTitleRule('guessed properties', private=True), 180 | 'language': LanguageRule('subtitle language', override=True), 181 | 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True), 182 | 'closed_caption': ClosedCaptionRule('closed caption', override=True), 183 | } 184 | }) 185 | self.executor = MkvMergeExecutor.get_executor_instance(suggested_path) 186 | 187 | def loaded(self) -> bool: 188 | """If library or executable was found.""" 189 | # if executor is None, print a warning and set to False to not repeat the warning 190 | if isinstance(self.executor, NotFoundExecutor): 191 | if not self.executor.warned: 192 | logger.warning(WARN_MSG) 193 | self.executor.warned = True 194 | # check if loaded 195 | return bool(self.executor) 196 | 197 | def accepts(self, video_path): 198 | """Accept Matroska videos when mkvmerge is available.""" 199 | return self.loaded() and video_path.lower().endswith(('.mkv', '.mka', '.mks')) 200 | 201 | @classmethod 202 | def extract_info(cls, video_path): 203 | """Extract info from the video.""" 204 | return json.loads(check_output(['mkvmerge', '-i', '-F', video_path]).decode()) 205 | 206 | def describe(self, video_path, context): 207 | """Return video metadata.""" 208 | data = self.executor.extract_info(video_path) 209 | 210 | def debug_data(): 211 | """Debug data.""" 212 | return json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False) 213 | 214 | context['debug_data'] = debug_data 215 | 216 | if logger.isEnabledFor(logging.DEBUG): 217 | logger.debug('Video %r scanned using mkvmerge %r has raw data:\n%s', 218 | video_path, self.executor.location, debug_data()) 219 | 220 | def merge_properties(target: dict): 221 | """Merge properties sub properties into the target container.""" 222 | return {**{k: v for k, v in target.items() if k != 'properties'}, **target.get('properties', {})} 223 | 224 | general_track = merge_properties(data.get('container', {})) 225 | video_tracks = [] 226 | audio_tracks = [] 227 | subtitle_tracks = [] 228 | for track in data.get('tracks'): 229 | track_type = track.get('type') 230 | merged = merge_properties(track) 231 | if track_type == 'video': 232 | video_tracks.append(merged) 233 | elif track_type == 'audio': 234 | audio_tracks.append(merged) 235 | elif track_type == 'subtitles': 236 | subtitle_tracks.append(merged) 237 | 238 | result = self._describe_tracks(video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context) 239 | 240 | if not result: 241 | raise MalformedFileError 242 | 243 | result['provider'] = { 244 | 'name': 'mkvmerge', 245 | 'version': self.version 246 | } 247 | 248 | return result 249 | 250 | @property 251 | def version(self): 252 | """Return mkvmerge version information.""" 253 | if not self.executor: 254 | return {} 255 | version = '.'.join(map(str, self.executor.version)) 256 | 257 | return {self.executor.location: f'v{version}'} 258 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pathlib 4 | import re 5 | import sys 6 | from collections.abc import Mapping 7 | from datetime import timedelta 8 | from io import BytesIO 9 | from zipfile import ZipFile 10 | 11 | import requests 12 | import yaml 13 | from yaml.constructor import Constructor 14 | 15 | from knowit import serializer 16 | from knowit.api import provider_names 17 | from knowit.serializer import format_property 18 | from knowit.units import units 19 | 20 | 21 | YAML_EXTENSIONS = ('.yml', '.yaml') 22 | 23 | 24 | duration_re = re.compile(r'(?P\d{1,2}):' 25 | r'(?P\d{1,2}):' 26 | r'(?P\d{1,2})(?:\.' 27 | r'(?P\d{3})' 28 | r'(?P\d{3})?\d*)?') 29 | 30 | serializer.YAMLLoader = serializer.get_yaml_loader({ 31 | 'tag:yaml.org,2002:str': lambda constructor, value: _parse_value(value), 32 | 'tag:yaml.org,2002:seq': Constructor.construct_sequence, 33 | }) 34 | 35 | 36 | one_ms = timedelta(milliseconds=1) 37 | 38 | 39 | def normalize_path(path: str): 40 | return os.fspath(pathlib.Path(path)) 41 | 42 | 43 | def parameters_from_yaml(name, input_key=None, expected_key=None): 44 | package_name, resource_name = name.split('.', 1) 45 | 46 | files = [] 47 | for yaml_ext in YAML_EXTENSIONS: 48 | yaml_file = os.path.join(package_name, resource_name + yaml_ext) 49 | if os.path.isfile(yaml_file): 50 | files.append(yaml_file) 51 | break 52 | 53 | parameters = [] 54 | for file_path in files: 55 | data = read_yaml(file_path) 56 | 57 | if input_key and expected_key: 58 | parameters.append((data[expected_key], data[input_key])) 59 | continue 60 | 61 | for root_key, root_value in data.items(): 62 | if isinstance(root_value, Mapping): 63 | for expected, data_input in root_value.items(): 64 | for properties in data_input if isinstance(data_input, (tuple, list)) else [data_input]: 65 | parameters.append((root_key, expected, properties)) 66 | else: 67 | for properties in root_value if isinstance(root_value, (tuple, list)) else [root_value]: 68 | parameters.append((root_key, properties)) 69 | 70 | return parameters 71 | 72 | 73 | def read_file(file_path): 74 | with open(file_path, 'r') as f: 75 | return f.read() 76 | 77 | 78 | def read_yaml(file_path): 79 | with open(file_path, 'r', encoding='utf-8') as f: 80 | return yaml.load(f, Loader=serializer.YAMLLoader) 81 | 82 | 83 | def read_json(file_path): 84 | with open(file_path, 'r') as f: 85 | return json.loads(f.read()) 86 | 87 | 88 | def id_func(param): 89 | return repr(param) 90 | 91 | 92 | class MediaFiles(object): 93 | """Represent media files in test/data folder.""" 94 | 95 | def __init__(self): 96 | """Initialize the object.""" 97 | self.videos = MediaFiles._videos() 98 | self.datafiles = MediaFiles._provider_datafiles() 99 | 100 | @staticmethod 101 | def _videos(): 102 | data_path = os.path.join('tests', 'data', 'videos') 103 | 104 | # download matroska test suite 105 | if not os.path.exists(data_path) or len(os.listdir(data_path)) != 8: 106 | r = requests.get('http://downloads.sourceforge.net/project/matroska/test_files/matroska_test_w1_1.zip') 107 | with ZipFile(BytesIO(r.content), 'r') as f: 108 | f.extractall(data_path, [m for m in f.namelist() if os.path.splitext(m)[1] == '.mkv']) 109 | 110 | # populate a dict with mkv files 111 | files = [] 112 | for path in os.listdir(data_path): 113 | name, _ = os.path.splitext(path) 114 | files.append(os.path.join(data_path, path)) 115 | 116 | return files 117 | 118 | @staticmethod 119 | def _provider_datafiles(): 120 | datafiles = {} 121 | for provider in provider_names: 122 | files = [] 123 | data_path = os.path.join('tests', 'data', provider) 124 | if not os.path.isdir(data_path): 125 | continue 126 | for path in os.listdir(data_path): 127 | if not path.lower().endswith(YAML_EXTENSIONS): 128 | files.append(os.path.join(data_path, path)) 129 | 130 | datafiles[provider] = files 131 | 132 | return datafiles 133 | 134 | def get_real_media(self, provider_name): 135 | """Return only real video files.""" 136 | return [Media(f, provider_name) for f in self.videos] 137 | 138 | def get_xml_media(self, provider_name): 139 | """Return all videos metadata as xml.""" 140 | return [XmlMedia(f, provider_name) for f in self.datafiles[provider_name]] 141 | 142 | def get_yaml_media(self, provider_name): 143 | """Return all videos metadata as yaml.""" 144 | return [YamlMedia(f, provider_name) for f in self.datafiles[provider_name]] 145 | 146 | def get_json_media(self, provider_name): 147 | """Return all videos metadata as json.""" 148 | return [JsonMedia(f, provider_name) for f in self.datafiles[provider_name]] 149 | 150 | 151 | mediafiles = MediaFiles() 152 | 153 | 154 | class Media(object): 155 | """Represent a media.""" 156 | 157 | def __init__(self, file_path, provider_name): 158 | """Initialize the object.""" 159 | self.file_path = file_path 160 | self.provider_name = provider_name 161 | 162 | @property 163 | def video_path(self): 164 | """Return the video path.""" 165 | return self.file_path 166 | 167 | @property 168 | def expected_data(self): 169 | """Return the expected video metadata.""" 170 | yaml_file = None 171 | yaml_folder = os.path.normpath(os.path.join(os.path.split(self.video_path)[0], os.pardir)) 172 | for yaml_ext in YAML_EXTENSIONS: 173 | yaml_file = os.path.join(yaml_folder, self.provider_name, os.path.basename(self.video_path) + yaml_ext) 174 | if os.path.isfile(yaml_file): 175 | break 176 | 177 | if not yaml_file or not os.path.isfile(yaml_file): 178 | raise IOError('Unable to find expected file for {!r}', self.video_path) 179 | 180 | return read_yaml(yaml_file) 181 | 182 | def __repr__(self): 183 | """Return the media representation.""" 184 | return '<{} [{}]>'.format(self.__class__.__name__, self.video_path) 185 | 186 | def __str__(self): 187 | """Return the media path.""" 188 | return self.video_path 189 | 190 | 191 | class DataMedia(Media): 192 | """Represent a video without the real file, only the video metadata.""" 193 | 194 | @property 195 | def video_path(self): 196 | """Return the video path.""" 197 | return os.path.splitext(self.file_path)[0] 198 | 199 | @property 200 | def expected_data(self): 201 | """Return the expected video metadata.""" 202 | yaml_file = None 203 | for yaml_ext in YAML_EXTENSIONS: 204 | yaml_file = self.video_path + yaml_ext 205 | if os.path.isfile(yaml_file): 206 | break 207 | 208 | if not yaml_file or not os.path.isfile(yaml_file): 209 | raise IOError('Unable to find expected file for {!r}', self.video_path) 210 | 211 | return read_yaml(yaml_file) 212 | 213 | 214 | class XmlMedia(DataMedia): 215 | """Represent a video without the real file, only the video metadata as xml.""" 216 | 217 | @property 218 | def input_data(self): 219 | """Return the video metadata as xml.""" 220 | return read_file(self.file_path) 221 | 222 | 223 | class YamlMedia(DataMedia): 224 | """Represent a video without the real file, only the video metadata as yaml.""" 225 | 226 | @property 227 | def input_data(self): 228 | """Return the video metadata as yaml.""" 229 | return read_yaml(self.file_path) 230 | 231 | 232 | class JsonMedia(DataMedia): 233 | """Represent a video without the real file, only the video metadata as json.""" 234 | 235 | @property 236 | def input_data(self): 237 | """Return the video metadata as json.""" 238 | return read_json(self.file_path) 239 | 240 | 241 | def _parse_value(node): 242 | def parse_duration(value): 243 | match = duration_re.match(value) 244 | if match: 245 | h, m, s, ms, mc = match.groups('0') 246 | return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms), microseconds=int(mc)) 247 | return value 248 | 249 | def parse_quantity(value): 250 | if isinstance(value, str): 251 | for unit in ('pixel', 'bit', 'byte', 'FPS', 'bps', 'Hz'): 252 | if value.endswith(' ' + unit): 253 | return units(value[:-len(unit)]) * units(unit) 254 | 255 | return value 256 | 257 | result = node.value 258 | for method in (parse_quantity, parse_duration): 259 | if result and isinstance(result, str): 260 | result = method(node.value) 261 | return result 262 | 263 | 264 | def is_iterable(obj): 265 | return isinstance(obj, (tuple, list)) 266 | 267 | 268 | def to_string(profile: str, value): 269 | formatted_value = format_property(profile, value) 270 | return str(formatted_value) if formatted_value is not None else None 271 | 272 | 273 | def check_equals(expected, actual, different, options, prefix=''): 274 | if isinstance(expected, Mapping): 275 | check_mapping_equals(expected, actual, different=different, options=options, prefix=prefix) 276 | elif is_iterable(expected): 277 | check_sequence_equals(expected, actual, different=different, options=options, prefix=prefix) 278 | elif isinstance(expected, timedelta): 279 | check_timedelta_equals(expected, actual, different=different, prefix=prefix) 280 | elif to_string(options['profile'], expected) != to_string(options['profile'], actual): 281 | different.append((prefix, expected, actual)) 282 | 283 | 284 | def check_timedelta_equals(expected, actual, different, prefix=''): 285 | if not isinstance(actual, timedelta) or not (expected - one_ms) <= actual <= (expected + one_ms): 286 | different.append((prefix, expected, actual)) 287 | 288 | 289 | def check_sequence_equals(expected, actual, different, options, prefix=''): 290 | if not is_iterable(actual) or len(expected) != len(actual): 291 | different.append((prefix, expected, actual)) 292 | return 293 | 294 | for i, expected_value in enumerate(expected): 295 | actual_value = actual[i] 296 | key = '{0}[{1}].'.format(prefix, i) 297 | check_equals(expected_value, actual_value, different=different, options=options, prefix=key) 298 | 299 | 300 | def check_mapping_equals(expected, actual, different, options, prefix=''): 301 | if not isinstance(actual, Mapping): 302 | different.append(('', expected, actual)) 303 | return 304 | 305 | for expected_key, expected_value in expected.items(): 306 | if expected_key == 'media_type': 307 | continue 308 | 309 | if expected_key not in actual: 310 | different.append((prefix + expected_key, expected_value, None)) 311 | continue 312 | 313 | actual_value = actual[expected_key] 314 | key = prefix + expected_key 315 | 316 | if expected_key == 'path': 317 | expected_value = normalize_path(expected_value) 318 | actual_value = normalize_path(actual_value) 319 | 320 | check_equals(expected_value, actual_value, different=different, options=options, prefix=key) 321 | 322 | for actual_key, actual_value in actual.items(): 323 | if actual_key not in expected: 324 | different.append((prefix + actual_key, None, actual_value)) 325 | continue 326 | 327 | 328 | def assert_expected(expected, actual, options=None): 329 | version = None 330 | if 'provider' in actual: 331 | version = actual['provider']['version'] 332 | del actual['provider']['version'] 333 | 334 | different = [] 335 | check_equals(expected, actual, different=different, options=options or {'profile': 'default'}) 336 | for (key, expected, actual) in different: 337 | print('{0}: Expected {1} got {2}'.format(key, expected, actual), file=sys.stderr) 338 | 339 | if different and options and options.get('debug_data'): 340 | print(f'Version: {version}') 341 | print(options['debug_data']()) 342 | 343 | assert not different 344 | --------------------------------------------------------------------------------