├── tests
    ├── data
    │   ├── enzyme
    │   │   ├── test4.mkv.yml
    │   │   ├── test6.mkv.yml
    │   │   ├── test7.mkv.yml
    │   │   ├── test8.mkv.yml
    │   │   ├── test1.mkv.yml
    │   │   ├── test3.mkv.yml
    │   │   ├── test2.mkv.yml
    │   │   ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml
    │   │   ├── issue-24-example-01.mkv.yml
    │   │   ├── test5.mkv.yml
    │   │   ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json
    │   │   └── issue-24-example-01.mkv.json
    │   ├── mkvmerge
    │   │   ├── test6.mkv.yml
    │   │   ├── test7.mkv.yml
    │   │   ├── test8.mkv.yml
    │   │   ├── test4.mkv.yml
    │   │   ├── test1.mkv.yml
    │   │   ├── test3.mkv.yml
    │   │   ├── test2.mkv.yml
    │   │   ├── test5.mkv.yml
    │   │   ├── media_001.mkv.yml
    │   │   └── media_001.mkv.json
    │   ├── ffmpeg
    │   │   ├── test4.mkv.yml
    │   │   ├── test6.mkv.yml
    │   │   ├── test7.mkv.yml
    │   │   ├── test8.mkv.yml
    │   │   ├── test1.mkv.yml
    │   │   ├── test2.mkv.yml
    │   │   ├── test3.mkv.yml
    │   │   ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml
    │   │   ├── issue-39-example-02.mkv.yml
    │   │   ├── test5-ffmpeg-v2.8.15.mkv.yml
    │   │   ├── test5.mkv.yml
    │   │   ├── issue-39-example-01.mkv.yml
    │   │   ├── media_001.mkv.yml
    │   │   ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json
    │   │   └── issue-39-example-02.mkv.json
    │   └── mediainfo
    │   │   ├── test4.mkv.yml
    │   │   ├── test6.mkv.yml
    │   │   ├── test7.mkv.yml
    │   │   ├── test8.mkv.yml
    │   │   ├── test1.mkv.yml
    │   │   ├── test2.mkv.yml
    │   │   ├── test3.mkv.yml
    │   │   ├── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml
    │   │   ├── test5.mkv.yml
    │   │   ├── media_001.mkv.yml
    │   │   ├── several-tracks.mkv.yml
    │   │   └── 7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json
    ├── test_video_profile.py
    ├── test_serializer.py
    ├── test_units.py
    ├── test_provider.py
    ├── test_properties.py
    ├── test_resolution.py
    ├── test_audiochannels.py
    ├── test_ffmpeg.py
    ├── test_mkvmerge.py
    ├── test_audiochannels.yml
    ├── test_enzyme.py
    ├── test_mediainfo.py
    ├── test_utils.py
    ├── conftest.py
    ├── test_properties.yml
    ├── test_resolution.yml
    └── __init__.py
├── scripts
    └── test.sh
├── knowit
    ├── providers
    │   ├── __init__.py
    │   ├── enzyme.py
    │   └── mkvmerge.py
    ├── rules
    │   ├── __init__.py
    │   ├── subtitle.py
    │   ├── general.py
    │   ├── video.py
    │   └── audio.py
    ├── properties
    │   ├── subtitle.py
    │   ├── __init__.py
    │   ├── audio.py
    │   ├── video.py
    │   └── general.py
    ├── __init__.py
    ├── units.py
    ├── config.py
    ├── utils.py
    ├── api.py
    ├── provider.py
    ├── __main__.py
    ├── serializer.py
    └── core.py
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── publish.yml
    │   ├── docker-publish.yml
    │   └── test.yml
├── LICENSE
├── setup.cfg
├── Dockerfile
├── .gitignore
├── pyproject.toml
└── README.md


/tests/data/enzyme/test4.mkv.yml:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -ex
4 | 
5 | flake8
6 | mypy knowit
7 | mypy tests
8 | pytest --cov-report term --cov-report html --cov knowit -vv tests


--------------------------------------------------------------------------------
/tests/test_video_profile.py:
--------------------------------------------------------------------------------
1 | from knowit.properties.video import VideoProfileTier
2 | 
3 | 
4 | def test_video_profile_tier_extract_key_when_no_tier():
5 |     assert VideoProfileTier._extract_key('') is False
6 | 


--------------------------------------------------------------------------------
/knowit/providers/__init__.py:
--------------------------------------------------------------------------------
1 | """Provider package."""
2 | 
3 | from knowit.providers.enzyme import EnzymeProvider
4 | from knowit.providers.ffmpeg import FFmpegProvider
5 | from knowit.providers.mediainfo import MediaInfoProvider
6 | from knowit.providers.mkvmerge import MkvMergeProvider
7 | 


--------------------------------------------------------------------------------
/tests/test_serializer.py:
--------------------------------------------------------------------------------
 1 | # Need to import knowit to check if it changes the default behavior of pyyaml
 2 | import yaml
 3 | 
 4 | 
 5 | def test_unchanged_pyyaml() -> None:
 6 |     ret = yaml.safe_load('value: 0.5')
 7 |     assert isinstance(ret, dict)
 8 |     assert "value" in ret
 9 |     assert ret["value"] == 0.5
10 | 


--------------------------------------------------------------------------------
/knowit/rules/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from knowit.rules.audio import AtmosRule
3 | from knowit.rules.audio import AudioChannelsRule
4 | from knowit.rules.audio import DtsHdRule
5 | from knowit.rules.general import LanguageRule
6 | from knowit.rules.subtitle import ClosedCaptionRule
7 | from knowit.rules.subtitle import HearingImpairedRule
8 | from knowit.rules.video import ResolutionRule
9 | 


--------------------------------------------------------------------------------
/knowit/properties/subtitle.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from knowit.core import Configurable
 3 | 
 4 | 
 5 | class SubtitleFormat(Configurable[str]):
 6 |     """Subtitle Format property."""
 7 | 
 8 |     @classmethod
 9 |     def _extract_key(cls, value) -> str:
10 |         key = str(value).upper()
11 |         if key.startswith('S_'):
12 |             key = key[2:]
13 | 
14 |         return key.split('/')[-1]
15 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/test6.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:01:27.336000'
 2 | path: tests/data/videos/test6.mkv
 3 | container: mkv
 4 | size: 23343928 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 854 pixel
 9 |   height: 480 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 480p
12 | audio:
13 | - id: 2
14 |   language: und
15 |   codec: MP3
16 |   channels_count: 2
17 |   channels: '2.0'
18 | provider:
19 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test6.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 87.336
 2 | path: tests/data/videos/test6.mkv
 3 | container: mkv
 4 | size: 23343928
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 854
 9 |   height: 480
10 |   scan_type: PROGRESSIVE
11 |   resolution: 480p
12 | audio:
13 | - id: 2
14 |   language: und
15 |   codec: MP3
16 |   channels_count: 2
17 |   channels: 2.0
18 |   sampling_rate: 48000
19 | provider:
20 |   name: mkvmerge
21 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/test7.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:00:37.043000'
 2 | path: tests/data/videos/test7.mkv
 3 | container: mkv
 4 | size: 21848518 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024 pixel
 9 |   height: 576 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: AAC
17 |   channels_count: 2
18 |   channels: '2.0'
19 | provider:
20 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/enzyme/test8.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:00:47.341000'
 2 | path: tests/data/videos/test8.mkv
 3 | container: mkv
 4 | size: 21224737 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024 pixel
 9 |   height: 576 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: AAC
17 |   channels_count: 2
18 |   channels: '2.0'
19 | provider:
20 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test7.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 37.043
 2 | path: tests/data/videos/test7.mkv
 3 | container: mkv
 4 | size: 21848518
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024
 9 |   height: 576
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: AAC
17 |   channels_count: 2
18 |   channels: 2.0
19 |   sampling_rate: 48000
20 | provider:
21 |   name: mkvmerge
22 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test8.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 47.341
 2 | path: tests/data/videos/test8.mkv
 3 | container: mkv
 4 | size: 21224737
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024
 9 |   height: 576
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: AAC
17 |   channels_count: 2
18 |   channels: 2.0
19 |   sampling_rate: 48000
20 | provider:
21 |   name: mkvmerge
22 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test4.mkv.yml:
--------------------------------------------------------------------------------
 1 | path: tests/data/videos/test4.mkv
 2 | container: mkv
 3 | size: 21313902
 4 | video:
 5 | - id: 1
 6 |   language: und
 7 |   width: 1280
 8 |   height: 720
 9 |   scan_type: PROGRESSIVE
10 |   resolution: 720p
11 |   default: true
12 | audio:
13 | - id: 2
14 |   language: und
15 |   codec: VORBIS
16 |   channels_count: 2
17 |   channels: 2.0
18 |   sampling_rate: 48000
19 |   default: true
20 | provider:
21 |   name: mkvmerge
22 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/test1.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:01:27.336000'
 2 | path: tests/data/videos/test1.mkv
 3 | container: mkv
 4 | size: 23339337 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 854 pixel
 9 |   height: 480 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 480p
12 |   default: true
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: MP3
17 |   channels_count: 2
18 |   channels: '2.0'
19 |   default: true
20 | provider:
21 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/enzyme/test3.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:00:49.064000'
 2 | path: tests/data/videos/test3.mkv
 3 | container: mkv
 4 | size: 21061472 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024 pixel
 9 |   height: 576 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   codec: MP3
17 |   channels_count: 2
18 |   channels: '2.0'
19 |   default: true
20 | provider:
21 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test1.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 87.336
 2 | path: tests/data/videos/test1.mkv
 3 | container: mkv
 4 | size: 23339337
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 854
 9 |   height: 480
10 |   scan_type: PROGRESSIVE
11 |   resolution: 480p
12 |   default: true
13 | audio:
14 | - id: 2
15 |   language: und
16 |   codec: MP3
17 |   channels_count: 2
18 |   channels: 2.0
19 |   sampling_rate: 48000
20 |   default: true
21 | provider:
22 |   name: mkvmerge
23 | 


--------------------------------------------------------------------------------
/tests/test_units.py:
--------------------------------------------------------------------------------
 1 | from knowit.units import NullRegistry
 2 | 
 3 | 
 4 | def test_null_registry_is_falsey():
 5 |     registry = NullRegistry()
 6 |     assert not registry
 7 | 
 8 | 
 9 | def test_null_registry_can_define():
10 |     registry = NullRegistry()
11 |     registry.define('FPS = 1 * hertz')
12 | 
13 | 
14 | def test_null_registry_attribute_is_a_scalar_1():
15 |     registry = NullRegistry()
16 |     assert registry.fps == 1
17 |     assert registry.some_attribute == 1
18 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/test2.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:00:47.509000'
 2 | path: tests/data/videos/test2.mkv
 3 | container: mkv
 4 | size: 21142764 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024 pixel
 9 |   height: 576 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   language: und
17 |   codec: AAC
18 |   channels_count: 2
19 |   channels: '2.0'
20 |   default: true
21 | provider:
22 |   name: enzyme


--------------------------------------------------------------------------------
/tests/test_provider.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from knowit.provider import Provider
 4 | from knowit.units import units
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'frame_rate', [
 9 |         pytest.param(3.4 * units.fps, id='Frame rate with magnitude'),
10 |         pytest.param(1, id='Frame rate without magnitude'),
11 |     ],
12 | )
13 | def test_provider_validate_track_frame_rate(frame_rate):
14 |     track = {'frame_rate': 0}
15 |     Provider._validate_track('video', track)
16 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test3.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 49.064
 2 | path: tests/data/videos/test3.mkv
 3 | container: mkv
 4 | size: 21061472
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024
 9 |   height: 576
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   language: en
17 |   codec: MP3
18 |   channels_count: 2
19 |   channels: 2.0
20 |   sampling_rate: 48000
21 |   default: true
22 | provider:
23 |   name: mkvmerge
24 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test2.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 47.509
 2 | path: tests/data/videos/test2.mkv
 3 | container: mkv
 4 | size: 21142764
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1354
 9 |   height: 576
10 |   scan_type: PROGRESSIVE
11 |   resolution: 1080p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   language: und
17 |   codec: AAC
18 |   channels_count: 2
19 |   channels: 2.0
20 |   sampling_rate: 48000
21 |   default: true
22 | provider:
23 |   name: mkvmerge
24 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test4.mkv.yml:
--------------------------------------------------------------------------------
 1 | path: tests/data/videos/test4.mkv
 2 | size: 21313902 byte
 3 | container: mkv
 4 | video:
 5 | - id: 0
 6 |   width: 1280 pixel
 7 |   height: 720 pixel
 8 |   scan_type: PROGRESSIVE
 9 |   aspect_ratio: 1.778
10 |   pixel_aspect_ratio: 1.0
11 |   resolution: 720p
12 |   frame_rate: 24.0 FPS
13 |   default: true
14 | audio:
15 | - id: 1
16 |   codec: VORBIS
17 |   channels_count: 2
18 |   channels: '2.0'
19 |   sampling_rate: 48000
20 |   default: true
21 | provider:
22 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/test_properties.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | 
 4 | from knowit import properties
 5 | 
 6 | from . import parameters_from_yaml
 7 | 
 8 | 
 9 | @pytest.mark.parametrize('name,expected,input', parameters_from_yaml(__name__))
10 | def test_resolution(config, context, name, expected, input):
11 |     # Given
12 |     prop_class = getattr(properties, name)
13 |     sut = prop_class(config, name)
14 |     track = {name: input}
15 | 
16 |     # When
17 |     actual = sut.extract_value(track, context)
18 | 
19 |     # Then
20 |     assert expected == actual
21 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File
 2 | path: tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv
 3 | duration: 0:01:37
 4 | container: mkv
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1920 pixel
 9 |   height: 1080 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 1080p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   name: 7.1Ch DTS-HD MA
17 |   codec: DTS
18 |   channels_count: 6
19 |   channels: '5.1'
20 |   default: true
21 | provider:
22 |   name: enzyme
23 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test6.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 6
 2 | path: tests/data/videos/test6.mkv
 3 | duration: '0:01:27.336000'
 4 | size: 23343928 byte
 5 | bit_rate: 2138309
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 854 pixel
10 |   height: 480 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.779
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 480p
15 |   frame_rate: 24.0 FPS
16 |   codec: MSMPEG4V2
17 | audio:
18 | - id: 1
19 |   codec: MP3
20 |   channels_count: 2
21 |   channels: '2.0'
22 |   bit_rate: 208000
23 |   sampling_rate: 48000
24 | provider:
25 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/test_resolution.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | 
 4 | from knowit.rules import ResolutionRule
 5 | 
 6 | from . import (
 7 |     assert_expected,
 8 |     parameters_from_yaml,
 9 | )
10 | 
11 | 
12 | @pytest.fixture
13 | def resolution_rule():
14 |     return ResolutionRule('resolution')
15 | 
16 | 
17 | @pytest.mark.parametrize('expected,input', parameters_from_yaml(__name__))
18 | def test_resolution(resolution_rule, context, expected, input):
19 |     # Given
20 | 
21 |     # When
22 |     actual = resolution_rule.execute(input, input, context)
23 | 
24 |     # Then
25 |     assert_expected(expected, actual)
26 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "daily"
12 | 
13 |   - package-ecosystem: "github-actions"
14 |     directory: "/"
15 |     schedule:
16 |       interval: "daily"
17 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/test4.mkv.yml:
--------------------------------------------------------------------------------
 1 | path: tests/data/videos/test4.mkv
 2 | size: 21313902 byte
 3 | container: mkv
 4 | video:
 5 | - id: 1
 6 |   width: 1280 pixel
 7 |   height: 720 pixel
 8 |   scan_type: PROGRESSIVE
 9 |   aspect_ratio: 1.778
10 |   pixel_aspect_ratio: 1.0
11 |   resolution: 720p
12 |   frame_rate: 24.0 FPS
13 |   bit_rate: 2500000
14 |   default: true
15 | audio:
16 | - id: 2
17 |   codec: VORBIS
18 |   channels_count: 2
19 |   channels: '2.0'
20 |   bit_rate: 192000
21 |   bit_rate_mode: VBR
22 |   sampling_rate: 48000
23 |   compression: LOSSY
24 |   default: true
25 | provider:
26 |   name: mediainfo
27 | 


--------------------------------------------------------------------------------
/tests/test_audiochannels.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from knowit.rules import AudioChannelsRule
 4 | 
 5 | from . import (
 6 |     assert_expected,
 7 |     parameters_from_yaml,
 8 | )
 9 | 
10 | 
11 | @pytest.fixture
12 | def audiochannels_rule():
13 |     return AudioChannelsRule('audio channels')
14 | 
15 | 
16 | @pytest.mark.parametrize('expected,input', parameters_from_yaml(__name__))
17 | def test_resolution(audiochannels_rule, context, expected, input):
18 |     # Given
19 | 
20 |     # When
21 |     actual = audiochannels_rule.execute(input, input, context)
22 | 
23 |     # Then
24 |     assert_expected(expected, actual)
25 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test7.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 7
 2 | path: tests/data/videos/test7.mkv
 3 | duration: '0:00:37.043000'
 4 | size: 21848518 byte
 5 | bit_rate: 4718520
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 576p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 | audio:
20 | - id: 1
21 |   codec: AAC
22 |   profile: LC
23 |   channels_count: 2
24 |   channels: '2.0'
25 |   sampling_rate: 48000
26 | provider:
27 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test8.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 8
 2 | path: tests/data/videos/test8.mkv
 3 | duration: '0:00:47.341000'
 4 | size: 21224737 byte
 5 | bit_rate: 3586698
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 576p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 | audio:
20 | - id: 1
21 |   codec: AAC
22 |   profile: LC
23 |   channels_count: 2
24 |   channels: '2.0'
25 |   sampling_rate: 48000
26 | provider:
27 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test1.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 1
 2 | path: tests/data/videos/test1.mkv
 3 | duration: '0:01:27.336000'
 4 | size: 23339337 byte
 5 | bit_rate: 2137889
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 854 pixel
10 |   height: 480 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.779
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 480p
15 |   frame_rate: 24.0 FPS
16 |   codec: MSMPEG4V2
17 |   default: true
18 | audio:
19 | - id: 1
20 |   codec: MP3
21 |   channels_count: 2
22 |   channels: '2.0'
23 |   bit_rate: 208000
24 |   sampling_rate: 48000
25 |   default: true
26 | provider:
27 |   name: ffmpeg


--------------------------------------------------------------------------------
/knowit/properties/__init__.py:
--------------------------------------------------------------------------------
 1 | from knowit.properties.audio import (
 2 |     AudioChannels,
 3 |     AudioCodec,
 4 |     AudioCompression,
 5 |     AudioProfile,
 6 |     BitRateMode,
 7 | )
 8 | from knowit.properties.general import (
 9 |     Basic,
10 |     Duration,
11 |     Language,
12 |     Quantity,
13 |     YesNo,
14 | )
15 | from knowit.properties.subtitle import (
16 |     SubtitleFormat,
17 | )
18 | from knowit.properties.video import (
19 |     Ratio,
20 |     ScanType,
21 |     VideoCodec,
22 |     VideoDimensions,
23 |     VideoEncoder,
24 |     VideoHdrFormat,
25 |     VideoProfile,
26 |     VideoProfileLevel,
27 |     VideoProfileTier,
28 | )
29 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test2.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Elephant Dream - test 2
 2 | path: tests/data/videos/test2.mkv
 3 | duration: '0:00:47.509000'
 4 | size: 21142764 byte
 5 | bit_rate: 3560212
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 2.351
13 |   pixel_aspect_ratio: 1.322
14 |   resolution: 1080p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 |   default: true
20 | audio:
21 | - id: 1
22 |   codec: AAC
23 |   profile: LC
24 |   channels_count: 2
25 |   channels: '2.0'
26 |   sampling_rate: 48000
27 |   default: true
28 | provider:
29 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test3.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Elephant Dream - test 3
 2 | path: tests/data/videos/test3.mkv
 3 | duration: '0:00:49.064000'
 4 | size: 21061472 byte
 5 | bit_rate: 3434122
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 576p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 |   default: true
20 | audio:
21 | - id: 1
22 |   language: en
23 |   codec: MP3
24 |   channels_count: 2
25 |   channels: '2.0'
26 |   bit_rate: 172001
27 |   sampling_rate: 48000
28 |   default: true
29 | provider:
30 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/enzyme/issue-24-example-01.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: The 100 06x09 (What You Take With You) - release by messafan for CasStudio
 2 | path: tests/data/enzyme/issue-24-example-01.mkv
 3 | duration: 0:42:05
 4 | container: mkv
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1280 pixel
 9 |   height: 720 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 720p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   codec: AC3
17 |   channels_count: 2
18 |   channels: '2.0'
19 |   language: pt
20 |   forced: true
21 |   default: true
22 | - id: 3
23 |   name: Stereo
24 |   codec: AC3
25 |   channels_count: 2
26 |   channels: '2.0'
27 |   language: en
28 | provider:
29 |   name: enzyme
30 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/test6.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 6
 2 | path: tests/data/videos/test6.mkv
 3 | duration: '0:01:27.336000'
 4 | size: 23343928 byte
 5 | bit_rate: 2138310
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:01:27.333000'
10 |   width: 854 pixel
11 |   height: 480 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.779
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 480p
16 |   frame_rate: 24.0 FPS
17 |   codec: MSMPEG4V2
18 |   media_type: video/MP4V-ES
19 | audio:
20 | - id: 2
21 |   duration: '0:01:27.336000'
22 |   codec: MP3
23 |   profile: LAYER3
24 |   channels_count: 2
25 |   channels: '2.0'
26 |   bit_rate_mode: VBR
27 |   sampling_rate: 48000
28 |   compression: LOSSY
29 | provider:
30 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/data/mediainfo/test7.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 7
 2 | path: tests/data/videos/test7.mkv
 3 | duration: '0:00:37.043000'
 4 | size: 21848518 byte
 5 | bit_rate: 4718520
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:00:37.042000'
10 |   width: 1024 pixel
11 |   height: 576 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.778
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 576p
16 |   frame_rate: 24.0 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '3.1'
21 |   media_type: video/H264
22 | audio:
23 | - id: 2
24 |   duration: '0:00:37.043000'
25 |   codec: AAC
26 |   profile: LC
27 |   channels_count: 2
28 |   channels: '2.0'
29 |   sampling_rate: 48000
30 |   compression: LOSSY
31 | provider:
32 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/data/mediainfo/test8.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 8
 2 | path: tests/data/videos/test8.mkv
 3 | duration: '0:00:47.341000'
 4 | size: 21224737 byte
 5 | bit_rate: 3586699
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:00:47.333000'
10 |   width: 1024 pixel
11 |   height: 576 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.778
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 576p
16 |   frame_rate: 24.0 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '3.1'
21 |   media_type: video/H264
22 | audio:
23 | - id: 2
24 |   duration: '0:00:47.341000'
25 |   codec: AAC
26 |   profile: LC
27 |   channels_count: 2
28 |   channels: '2.0'
29 |   sampling_rate: 48000
30 |   compression: LOSSY
31 | provider:
32 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/data/mediainfo/test1.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 1
 2 | path: tests/data/videos/test1.mkv
 3 | duration: '0:01:27.336000'
 4 | size: 23339337 byte
 5 | bit_rate: 2137889
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:01:27.333000'
10 |   width: 854 pixel
11 |   height: 480 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.779
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 480p
16 |   frame_rate: 24.0 FPS
17 |   codec: MSMPEG4V2
18 |   media_type: video/MP4V-ES
19 |   default: true
20 | audio:
21 | - id: 2
22 |   duration: '0:01:27.336000'
23 |   codec: MP3
24 |   profile: LAYER3
25 |   channels_count: 2
26 |   channels: '2.0'
27 |   bit_rate_mode: VBR
28 |   sampling_rate: 48000
29 |   compression: LOSSY
30 |   default: true
31 | provider:
32 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/data/mediainfo/test2.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Elephant Dream - test 2
 2 | path: tests/data/videos/test2.mkv
 3 | duration: '0:00:47.509000'
 4 | size: 21142764 byte
 5 | bit_rate: 3560212
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:00:47.500000'
10 |   width: 1024 pixel
11 |   height: 576 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 2.351
14 |   pixel_aspect_ratio: 1.322
15 |   resolution: 1080p
16 |   frame_rate: 24.0 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '3.1'
21 |   media_type: video/H264
22 |   default: true
23 | audio:
24 | - id: 2
25 |   duration: '0:00:47.509000'
26 |   codec: AAC
27 |   profile: LC
28 |   channels_count: 2
29 |   channels: '2.0'
30 |   sampling_rate: 48000
31 |   compression: LOSSY
32 |   default: true
33 | provider:
34 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File
 2 | path: tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv
 3 | duration: '0:01:37.931000'
 4 | size: 40772443 byte
 5 | bit_rate: 3330707
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1920 pixel
10 |   height: 1080 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 1080p
15 |   frame_rate: 23.976 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 |   default: true
20 | audio:
21 | - id: 1
22 |   name: 7.1Ch DTS-HD MA
23 |   language: en
24 |   codec: DTSHD
25 |   profile: MA
26 |   channels_count: 8
27 |   channels: '7.1'
28 |   bit_depth: 24 bit
29 |   sampling_rate: 48000
30 |   default: true
31 | provider:
32 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/enzyme/test5.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: '0:00:46.665000'
 2 | path: tests/data/videos/test5.mkv
 3 | container: mkv
 4 | size: 31762747 byte
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024 pixel
 9 |   height: 576 pixel
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   language: und
17 |   codec: AAC
18 |   channels_count: 2
19 |   channels: '2.0'
20 |   default: true
21 | - id: 10
22 |   name: Commentary
23 |   codec: AAC
24 |   channels_count: 1
25 |   channels: '1.0'
26 | subtitle:
27 | - id: 3
28 |   default: true
29 | - id: 4
30 |   language: hu
31 | - id: 5
32 |   language: de
33 | - id: 6
34 |   language: fr
35 | - id: 8
36 |   language: es
37 | - id: 9
38 |   language: it
39 | - id: 11
40 |   language: ja
41 | - id: 7
42 |   language: und
43 | provider:
44 |   name: enzyme


--------------------------------------------------------------------------------
/tests/data/mediainfo/test3.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Elephant Dream - test 3
 2 | path: tests/data/videos/test3.mkv
 3 | duration: '0:00:49.064000'
 4 | size: 21061472 byte
 5 | bit_rate: 3434122
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:00:49.083000'
10 |   width: 1024 pixel
11 |   height: 576 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.778
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 576p
16 |   frame_rate: 24.0 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '3.1'
21 |   media_type: video/H264
22 |   default: true
23 | audio:
24 | - id: 2
25 |   language: en
26 |   duration: '0:00:49.064000'
27 |   codec: MP3
28 |   profile: LAYER3
29 |   channels_count: 2
30 |   channels: '2.0'
31 |   bit_rate_mode: VBR
32 |   sampling_rate: 48000
33 |   compression: LOSSY
34 |   default: true
35 | provider:
36 |   name: mediainfo


--------------------------------------------------------------------------------
/knowit/rules/subtitle.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from knowit.core import Rule
 4 | 
 5 | 
 6 | class ClosedCaptionRule(Rule):
 7 |     """Closed caption rule."""
 8 | 
 9 |     cc_re = re.compile(r'(\bcc\d\b)', re.IGNORECASE)
10 | 
11 |     def execute(self, props, pv_props, context):
12 |         """Execute closed caption rule."""
13 |         if '_closed_caption' in pv_props and self.cc_re.search(pv_props['_closed_caption']):
14 |             return True
15 | 
16 |         if 'guessed' in pv_props:
17 |             guessed = pv_props['guessed']
18 |             return guessed.get('closed_caption')
19 | 
20 | 
21 | class HearingImpairedRule(Rule):
22 |     """Hearing Impaired rule."""
23 | 
24 |     def execute(self, props, pv_props, context):
25 |         """Hearing Impaired."""
26 |         if 'guessed' in pv_props:
27 |             guessed = pv_props['guessed']
28 |             return guessed.get('hearing_impaired')
29 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/test5.mkv.yml:
--------------------------------------------------------------------------------
 1 | duration: 46.665
 2 | path: tests/data/videos/test5.mkv
 3 | container: mkv
 4 | size: 31762747
 5 | video:
 6 | - id: 1
 7 |   language: und
 8 |   width: 1024
 9 |   height: 576
10 |   scan_type: PROGRESSIVE
11 |   resolution: 576p
12 |   codec: H264
13 |   default: true
14 | audio:
15 | - id: 2
16 |   language: und
17 |   codec: AAC
18 |   channels_count: 2
19 |   channels: 2.0
20 |   sampling_rate: 48000
21 |   default: true
22 | - id: 10
23 |   language: en
24 |   codec: AAC
25 |   channels_count: 1
26 |   channels: 1.0
27 |   sampling_rate: 22050
28 | subtitle:
29 | - id: 3
30 |   language: en
31 |   default: true
32 | - id: 4
33 |   language: hu
34 | - id: 5
35 |   language: de
36 | - id: 6
37 |   language: fr
38 | - id: 8
39 |   language: es
40 | - id: 9
41 |   language: it
42 | - id: 11
43 |   language: ja
44 | - id: 7
45 |   language: und
46 | provider:
47 |   name: mkvmerge
48 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/issue-39-example-02.mkv.yml:
--------------------------------------------------------------------------------
 1 | path: 'Z:\Videos\Shows (Prospective)\Band of Brothers (2001)\01x01 - Currahee.mkv'
 2 | duration: '1:13:14.162000'
 3 | size: 983004345 byte
 4 | bit_rate: 1789655
 5 | container: mkv
 6 | video:
 7 |   - id: 0
 8 |     duration: '1:13:14.056708'
 9 |     width: 1920 pixel
10 |     height: 1080 pixel
11 |     scan_type: PROGRESSIVE
12 |     aspect_ratio: '1.778'
13 |     pixel_aspect_ratio: '1.0'
14 |     resolution: 1080p
15 |     frame_rate: 23.976 FPS
16 |     bit_rate: 1500697
17 |     codec: H265
18 |     profile: MAIN
19 |     default: true
20 | audio:
21 |   - id: 1
22 |     name: Surround
23 |     language: en
24 |     duration: '1:13:12.106000'
25 |     codec: AAC
26 |     profile: HEAAC
27 |     channels_count: 6
28 |     channels: '5.1'
29 |     bit_rate: 256002
30 |     sampling_rate: 48000
31 |     default: true
32 | subtitle:
33 |   - id: 2
34 |     language: en
35 |     format: PGS
36 | provider:
37 |   name: ffmpeg
38 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/media_001.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Media 001
 2 | duration: '0:57:08.352000'
 3 | path: tests/data/mkvmerge/media_001.mkv
 4 | container: mkv
 5 | video:
 6 | - id: 1
 7 |   language: en
 8 |   width: 3840
 9 |   height: 2160
10 |   resolution: 2160p
11 |   scan_type: PROGRESSIVE
12 |   codec: H265
13 | audio:
14 | - id: 2
15 |   language: en
16 |   codec: TRUEHD
17 |   channels_count: 8
18 |   channels: 7.1
19 |   sampling_rate: 48000
20 |   default: true
21 | - id: 3
22 |   language: en
23 |   codec: AC3
24 |   channels_count: 6
25 |   channels: 5.1
26 |   sampling_rate: 48000
27 | - id: 4
28 |   language: de
29 |   codec: AC3
30 |   channels_count: 6
31 |   channels: 5.1
32 |   sampling_rate: 48000
33 | - id: 5
34 |   language: pt-BR
35 |   codec: DTS
36 |   channels_count: 2
37 |   channels: 2.0
38 |   sampling_rate: 48000
39 | subtitle:
40 | - id: 6
41 |   language: en
42 | - id: 7
43 |   language: de
44 | - id: 8
45 |   language: pt-BR
46 | provider:
47 |   name: mkvmerge
48 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: 7.1Ch DTS-HD MA - Speaker Mapping Test File
 2 | path: tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv
 3 | duration: '0:01:37.931000'
 4 | size: 40772443 byte
 5 | bit_rate: 3330708
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:01:37.931000'
10 |   width: 1920 pixel
11 |   height: 1080 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.778
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 1080p
16 |   frame_rate: 23.976 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '4'
21 |   media_type: video/H264
22 |   default: true
23 | audio:
24 | - id: 2
25 |   name: 7.1Ch DTS-HD MA
26 |   language: en
27 |   duration: '0:01:37.931000'
28 |   codec: DTSHD
29 |   profile: MA
30 |   channels_count: 8
31 |   channels: '7.1'
32 |   bit_depth: 24 bit
33 |   bit_rate_mode: VBR
34 |   sampling_rate: 48000
35 |   compression: LOSSLESS
36 |   default: true
37 | provider:
38 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/test_ffmpeg.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | 
 5 | from knowit import know
 6 | 
 7 | from . import (
 8 |     assert_expected,
 9 |     id_func,
10 |     mediafiles
11 | )
12 | 
13 | 
14 | @pytest.mark.parametrize('media', mediafiles.get_json_media('ffmpeg'), ids=id_func)
15 | def test_ffmpeg_provider(ffmpeg, media, options):
16 |     # Given
17 |     ffmpeg[media.video_path] = media.input_data
18 | 
19 |     # When
20 |     actual = know(media.video_path, options)
21 | 
22 |     # Then
23 |     assert_expected(media.expected_data, actual, options)
24 |     assert pickle.loads(pickle.dumps(actual)) == actual
25 | 
26 | 
27 | @pytest.mark.parametrize('media', mediafiles.get_real_media('ffmpeg'), ids=id_func)
28 | def test_ffmpeg_provider_real_media(media, options):
29 |     # Given
30 |     options['provider'] = 'ffmpeg'
31 | 
32 |     # When
33 |     actual = know(media.video_path, options)
34 | 
35 |     # Then
36 |     assert_expected(media.expected_data, actual, options)
37 |     assert pickle.loads(pickle.dumps(actual)) == actual
38 | 


--------------------------------------------------------------------------------
/tests/test_mkvmerge.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | 
 5 | from knowit import know
 6 | 
 7 | from . import (
 8 |     assert_expected,
 9 |     id_func,
10 |     mediafiles
11 | )
12 | 
13 | 
14 | @pytest.mark.parametrize('media', mediafiles.get_json_media('mkvmerge'), ids=id_func)
15 | def test_mkvmerge_provider(mkvmerge, media, options):
16 |     # Given
17 |     mkvmerge[media.video_path] = media.input_data
18 | 
19 |     # When
20 |     actual = know(media.video_path, options)
21 | 
22 |     # Then
23 |     assert_expected(media.expected_data, actual, options)
24 |     assert pickle.loads(pickle.dumps(actual)) == actual
25 | 
26 | 
27 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mkvmerge'), ids=id_func)
28 | def test_mkvmerge_provider_real_media(media, options):
29 |     # Given
30 |     options['provider'] = 'mkvmerge'
31 | 
32 |     # When
33 |     actual = know(media.video_path, options)
34 | 
35 |     # Then
36 |     assert_expected(media.expected_data, actual, options)
37 |     assert pickle.loads(pickle.dumps(actual)) == actual
38 | 


--------------------------------------------------------------------------------
/knowit/rules/general.py:
--------------------------------------------------------------------------------
 1 | from logging import NullHandler, getLogger
 2 | 
 3 | from trakit.api import trakit
 4 | 
 5 | from knowit.core import Rule
 6 | 
 7 | logger = getLogger(__name__)
 8 | logger.addHandler(NullHandler())
 9 | 
10 | 
11 | class GuessTitleRule(Rule):
12 |     """Guess properties from track title."""
13 | 
14 |     def execute(self, props, pv_props, context):
15 |         """Language detection using name."""
16 |         if 'name' in props:
17 |             language = props.get('language')
18 |             options = {'expected_language': language} if language else {}
19 |             guessed = trakit(props['name'], options)
20 |             if guessed:
21 |                 return guessed
22 | 
23 | 
24 | class LanguageRule(Rule):
25 |     """Language rules."""
26 | 
27 |     def execute(self, props, pv_props, context):
28 |         """Language detection using name."""
29 |         if 'guessed' not in pv_props:
30 |             return
31 | 
32 |         guess = pv_props['guessed']
33 |         if 'language' in guess:
34 |             return guess['language']
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Rato
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/test_audiochannels.yml:
--------------------------------------------------------------------------------
 1 | '1.0':
 2 |   - channels_count: 1
 3 |   - channels_count: 1
 4 |     channel_positions: 1/0/0
 5 | 
 6 | '2.0':
 7 |   - channels_count: 2
 8 |   - channels_count: 2
 9 |     channel_positions: 2/0/0
10 | 
11 | '2.1':
12 |   - channels_count: 3
13 |     channel_positions: 2/0/0.1
14 | 
15 | '3.0':
16 |   - channels_count: 3
17 |     channel_positions: 3/0/0
18 | 
19 | '3.1':
20 |   - channels_count: 4
21 |     channel_positions: 3/0/0.1
22 | 
23 | '4.0':
24 |   - channels_count: 4
25 |     channel_positions: 2/2/0
26 |   - channels_count: 4
27 |     channel_positions: 2/0/2
28 | 
29 | '4.1':
30 |   - channels_count: 5
31 |     channel_positions: 2/2/0.1
32 |   - channels_count: 5
33 |     channel_positions: 2/0/2.1
34 | 
35 | '5.0':
36 |   - channels_count: 5
37 |     channel_positions: 3/2/0
38 | 
39 | '5.1':
40 |   - channels_count: 6
41 |   - channels_count: 6
42 |     channel_positions: 3/2/0.1
43 | 
44 | '7.1':
45 |   - channels_count: 8
46 |   - channels_count: 8
47 |     channel_positions: [3/2/2.1, 3/2/0.1]
48 | 
49 | '6.1':
50 |    - channels_count: 7
51 |      channel_positions: 3/3/0.1
52 | 
53 | '20.1':
54 |   - channels_count: 21
55 |     channel_positions: 5/4/11.1
56 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Check out the repo
13 |         uses: actions/checkout@v4
14 | 
15 |       - name: Setup python
16 |         uses: actions/setup-python@v5
17 |         with:
18 |           python-version: 3.13
19 | 
20 |       - name: Install poetry
21 |         uses: snok/install-poetry@v1.4.1
22 |         with:
23 |           virtualenvs-in-project: true
24 | 
25 |       - name: Load cached venv
26 |         id: cached-poetry-dependencies
27 |         uses: actions/cache@v4
28 |         with:
29 |           path: .venv
30 |           key: venv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }}
31 | 
32 |       - name: Install dependencies
33 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
34 |         run: poetry install --no-interaction --no-root
35 | 
36 |       - name: Install library
37 |         run: poetry install --no-interaction
38 | 
39 |       - name: Publish
40 |         env:
41 |           POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }}
42 |         run: poetry publish --build


--------------------------------------------------------------------------------
/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Docker image
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | 
 8 | jobs:
 9 |   publish_to_docker_hub:
10 |     name: Publish Docker image to Docker Hub
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Check out the repo
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Log in to Docker Hub
17 |         uses: docker/login-action@v3
18 |         with:
19 |           username: ${{ secrets.DOCKER_USERNAME }}
20 |           password: ${{ secrets.DOCKER_PASSWORD }}
21 | 
22 |       - name: Extract metadata (tags, labels) for Docker
23 |         id: meta
24 |         uses: docker/metadata-action@v5
25 |         with:
26 |           images: ratoaq2/knowit
27 | 
28 |       - name: Set up QEMU
29 |         uses: docker/setup-qemu-action@v3
30 | 
31 |       - name: Set up Docker Buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Build and push Docker image
35 |         uses: docker/build-push-action@v6
36 |         with:
37 |           context: .
38 |           platforms: linux/amd64,linux/arm64
39 |           push: true
40 |           tags: ${{ steps.meta.outputs.tags }}
41 |           labels: ${{ steps.meta.outputs.labels }}
42 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude = .git,.github,.pytest_cache,.venv,dist
 3 | import-order-style = cryptography
 4 | application-import-names = knowit
 5 | max-line-length = 120
 6 | ignore =
 7 |     # D100 Missing docstring in public module
 8 |     D100
 9 |     # D103 Missing docstring in public function
10 |     D103
11 |     # I201 Missing newline between import groups
12 |     I201
13 | per-file-ignores =
14 |     __init__.py:
15 |         # D104 Missing docstring in public package
16 |         D104
17 |         # F401 Imported but unused
18 |         F401
19 |     knowit/__init__.py:
20 |         # E402 Module level import not at top of file
21 |         E402
22 |         # F401 Imported but unused
23 |         F401
24 |     knowit/api.py:
25 |         # N818 error suffix in exception names
26 |         N818
27 | 
28 | [mypy]
29 | 
30 | [mypy-pint.*]
31 | ignore_missing_imports = True
32 | 
33 | [mypy-babelfish.*]
34 | ignore_missing_imports = True
35 | 
36 | [mypy-enzyme.*]
37 | ignore_missing_imports = True
38 | 
39 | [mypy-pkg_resources.*]
40 | ignore_missing_imports = True
41 | 
42 | [mypy-pymediainfo.*]
43 | ignore_missing_imports = True
44 | 
45 | [mypy-trakit.*]
46 | ignore_missing_imports = True
47 | 
48 | [mypy-yaml.*]
49 | ignore_missing_imports = True


--------------------------------------------------------------------------------
/tests/test_enzyme.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | from knowit import KnowitException, know
 5 | 
 6 | from . import (
 7 |     assert_expected,
 8 |     id_func,
 9 |     mediafiles
10 | )
11 | 
12 | 
13 | @pytest.mark.parametrize('media', mediafiles.get_json_media('enzyme'), ids=id_func)
14 | def test_enzyme_provider(enzyme, media, options):
15 |     # Given
16 |     enzyme[media.video_path] = media.input_data
17 | 
18 |     # When
19 |     actual = know(media.video_path, options)
20 | 
21 |     # Then
22 |     assert_expected(media.expected_data, actual, options)
23 |     assert pickle.loads(pickle.dumps(actual)) == actual
24 | 
25 | 
26 | @pytest.mark.parametrize('media', mediafiles.get_real_media('enzyme'), ids=id_func)
27 | def test_enzyme_provider_real_media(media, options):
28 |     # Given
29 |     options['provider'] = 'enzyme'
30 |     options['fail_on_error'] = False
31 | 
32 |     # When
33 |     if not media.expected_data:
34 |         with pytest.raises(KnowitException):
35 |             know(media.video_path, options)
36 |     else:
37 |         actual = know(media.video_path, options)
38 | 
39 |         # Then
40 |         assert_expected(media.expected_data, actual, options)
41 |         assert pickle.loads(pickle.dumps(actual)) == actual
42 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test5-ffmpeg-v2.8.15.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 8
 2 | path: test5.mkv
 3 | duration: '0:00:46.665000'
 4 | size: 31762747 byte
 5 | bit_rate: 5445236
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 576p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 |   default: true
20 | audio:
21 | - id: 1
22 |   codec: AAC
23 |   profile: LC
24 |   channels_count: 2
25 |   channels: '2.0'
26 |   sampling_rate: 48000
27 |   default: true
28 | - id: 8
29 |   name: Commentary
30 |   language: en
31 |   codec: AAC
32 |   profile: LC
33 |   channels_count: 1
34 |   channels: '1.0'
35 |   sampling_rate: 22050
36 | subtitle:
37 | - id: 2
38 |   language: en
39 |   format: SUBRIP
40 |   default: true
41 | - id: 3
42 |   language: hu
43 |   format: SUBRIP
44 | - id: 4
45 |   language: de
46 |   format: SUBRIP
47 | - id: 5
48 |   language: fr
49 |   format: SUBRIP
50 | - id: 6
51 |   language: es
52 |   format: SUBRIP
53 | - id: 7
54 |   language: it
55 |   format: SUBRIP
56 | - id: 9
57 |   language: ja
58 |   format: SUBRIP
59 | - id: 10
60 |   format: SUBRIP
61 | provider:
62 |   name: ffmpeg


--------------------------------------------------------------------------------
/tests/data/ffmpeg/test5.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 8
 2 | path: tests/data/videos/test5.mkv
 3 | duration: '0:00:46.665000'
 4 | size: 31762747 byte
 5 | bit_rate: 5445236
 6 | container: mkv
 7 | video:
 8 | - id: 0
 9 |   width: 1024 pixel
10 |   height: 576 pixel
11 |   scan_type: PROGRESSIVE
12 |   aspect_ratio: 1.778
13 |   pixel_aspect_ratio: 1.0
14 |   resolution: 576p
15 |   frame_rate: 24.0 FPS
16 |   bit_depth: 8 bit
17 |   codec: H264
18 |   profile: MAIN
19 |   default: true
20 | audio:
21 | - id: 1
22 |   codec: AAC
23 |   profile: LC
24 |   channels_count: 2
25 |   channels: '2.0'
26 |   sampling_rate: 48000
27 |   default: true
28 | - id: 8
29 |   name: Commentary
30 |   language: en
31 |   codec: AAC
32 |   profile: LC
33 |   channels_count: 1
34 |   channels: '1.0'
35 |   sampling_rate: 22050
36 | subtitle:
37 | - id: 2
38 |   language: en
39 |   format: SUBRIP
40 |   default: true
41 | - id: 3
42 |   language: hu
43 |   format: SUBRIP
44 | - id: 4
45 |   language: de
46 |   format: SUBRIP
47 | - id: 5
48 |   language: fr
49 |   format: SUBRIP
50 | - id: 6
51 |   language: es
52 |   format: SUBRIP
53 | - id: 7
54 |   language: it
55 |   format: SUBRIP
56 | - id: 9
57 |   language: ja
58 |   format: SUBRIP
59 | - id: 10
60 |   format: SUBRIP
61 | provider:
62 |   name: ffmpeg


--------------------------------------------------------------------------------
/knowit/__init__.py:
--------------------------------------------------------------------------------
 1 | """Know your media files better."""
 2 | from importlib import metadata
 3 | 
 4 | __title__ = metadata.metadata(__package__)['name']
 5 | __version__ = metadata.version(__package__)
 6 | __short_version__ = '.'.join(__version__.split('.')[:2])
 7 | __author__ = metadata.metadata(__package__)['author']
 8 | __license__ = metadata.metadata(__package__)['license']
 9 | __url__ = 'https://github.com/ratoaq2/knowit'
10 | 
11 | del metadata
12 | 
13 | #: Video extensions
14 | VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
15 |                     '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
16 |                     '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
17 |                     '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx',
18 |                     '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv',
19 |                     '.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv',
20 |                     '.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
21 | 
22 | from knowit.api import KnowitException, know
23 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/issue-39-example-01.mkv.yml:
--------------------------------------------------------------------------------
 1 | path: 'Z:\Videos\Shows\Heroes (2006)\01x23 - How to Stop an Exploding Man.mkv'
 2 | duration: '0:44:56.704000'
 3 | size: 369338385 byte
 4 | bit_rate: 1095673
 5 | container: mkv
 6 | video:
 7 |   - id: 0
 8 |     duration: '0:44:56.485708'
 9 |     width: 1920 pixel
10 |     height: 1080 pixel
11 |     scan_type: PROGRESSIVE
12 |     aspect_ratio: '1.778'
13 |     pixel_aspect_ratio: '1.0'
14 |     resolution: 1080p
15 |     frame_rate: 23.976 FPS
16 |     bit_rate: 900213
17 |     codec: H265
18 |     profile: MAIN
19 |     default: true
20 | audio:
21 |   - id: 1
22 |     name: Surround
23 |     language: und
24 |     duration: '0:44:56.661000'
25 |     codec: AAC
26 |     profile: HEAAC
27 |     channels_count: 6
28 |     channels: '5.1'
29 |     bit_rate: 192003
30 |     sampling_rate: 48000
31 |     default: true
32 | subtitle:
33 |   - id: 2
34 |     language: ar
35 |     format: ASS
36 |   - id: 3
37 |     language: da
38 |     format: ASS
39 |   - id: 4
40 |     language: nl
41 |     format: ASS
42 |   - id: 5
43 |     language: en
44 |     format: ASS
45 |   - id: 6
46 |     language: fr
47 |     format: ASS
48 |   - id: 7
49 |     language: de
50 |     format: ASS
51 |   - id: 8
52 |     language: fa
53 |     format: ASS
54 |   - id: 9
55 |     language: es
56 |     format: ASS
57 |   - id: 10
58 |     language: vi
59 |     format: ASS
60 | provider:
61 |   name: ffmpeg
62 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.13-slim as builder
 2 | 
 3 | ENV PYTHONFAULTHANDLER=1 \
 4 |     PYTHONUNBUFFERED=1 \
 5 |     PYTHONHASHSEED=random \
 6 |     PYTHONDONTWRITEBYTECODE=1 \
 7 |     PIP_NO_CACHE_DIR=off \
 8 |     PIP_DISABLE_PIP_VERSION_CHECK=on \
 9 |     PIP_DEFAULT_TIMEOUT=100 \
10 |     POETRY_VERSION=1.8.3 \
11 |     POETRY_VIRTUALENVS_CREATE=0
12 | 
13 | RUN apt-get update \
14 |     && apt-get install -y --no-install-recommends python3-distutils python3-venv \
15 |     && apt-get clean \
16 |     && rm -rf /var/lib/apt/lists/*
17 | 
18 | RUN pip install "poetry==$POETRY_VERSION"
19 | 
20 | WORKDIR /app
21 | COPY poetry.lock pyproject.toml README.md /app/
22 | RUN poetry install --no-interaction --no-ansi --only main
23 | RUN pip install platformdirs
24 | COPY knowit/ /app/knowit/
25 | RUN poetry build --no-interaction --no-ansi
26 | 
27 | 
28 | FROM python:3.13-slim
29 | 
30 | ENV PYTHONFAULTHANDLER=1 \
31 |     PYTHONUNBUFFERED=1 \
32 |     PYTHONHASHSEED=random \
33 |     PYTHONDONTWRITEBYTECODE=1 \
34 |     PIP_NO_CACHE_DIR=off \
35 |     PIP_DISABLE_PIP_VERSION_CHECK=on \
36 |     PIP_DEFAULT_TIMEOUT=100
37 | 
38 | RUN apt-get update \
39 |  && apt-get install -y --no-install-recommends mediainfo ffmpeg mkvtoolnix \
40 |  && apt-get clean \
41 |  && rm -rf /var/lib/apt/lists/*
42 | 
43 | COPY --from=builder /app/dist /usr/src/dist
44 | 
45 | RUN pip install /usr/src/dist/knowit-*.tar.gz
46 | 
47 | WORKDIR /
48 | 
49 | ENTRYPOINT ["knowit"]
50 | CMD ["--help"]
51 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: [3.9, '3.10', 3.11, 3.12, 3.13]
15 |     steps:
16 |       - name: Check out the repo
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Install system dependencies
20 |         run: |
21 |           sudo apt-get update
22 |           sudo apt-get install -y mediainfo ffmpeg mkvtoolnix
23 | 
24 |       - name: Setup python
25 |         uses: actions/setup-python@v5
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 | 
29 |       - name: Install poetry
30 |         uses: snok/install-poetry@v1.4.1
31 |         with:
32 |           virtualenvs-in-project: true
33 | 
34 |       - name: Load cached venv
35 |         id: cached-poetry-dependencies
36 |         uses: actions/cache@v4
37 |         with:
38 |           path: .venv
39 |           key: venv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }}
40 | 
41 |       - name: Install dependencies
42 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
43 |         run: poetry install --no-interaction --no-root
44 | 
45 |       - name: Install library
46 |         run: poetry install --no-interaction
47 | 
48 |       - name: Run tests
49 |         run: |
50 |           source .venv/bin/activate
51 |           bash scripts/test.sh
52 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "info": {
 3 |         "duration": "0:01:37",
 4 |         "muxing_app": "libebml v1.3.0 + libmatroska v1.4.1",
 5 |         "date_utc": "2013-12-13 17:49:28",
 6 |         "writing_app": "mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec  1 2013 17:55:00",
 7 |         "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File"
 8 |     },
 9 |     "video_tracks": [
10 |         {
11 |             "forced": false,
12 |             "display_height": 1080,
13 |             "language": "und",
14 |             "default": true,
15 |             "enabled": true,
16 |             "number": 1,
17 |             "crop": {},
18 |             "height": 1080,
19 |             "width": 1920,
20 |             "codec_id": "V_MPEG4/ISO/AVC",
21 |             "lacing": false,
22 |             "type": 1,
23 |             "display_width": 1920,
24 |             "interlaced": false
25 |         }
26 |     ],
27 |     "tags": [],
28 |     "chapters": [],
29 |     "audio_tracks": [
30 |         {
31 |             "forced": false,
32 |             "name": "7.1Ch DTS-HD MA",
33 |             "default": true,
34 |             "enabled": true,
35 |             "number": 2,
36 |             "sampling_frequency": 48000.0,
37 |             "channels": 6,
38 |             "codec_id": "A_DTS",
39 |             "lacing": true,
40 |             "type": 2
41 |         }
42 |     ],
43 |     "recurse_seek_head": false,
44 |     "subtitle_tracks": []
45 | }


--------------------------------------------------------------------------------
/tests/data/mediainfo/test5.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Big Buck Bunny - test 8
 2 | path: tests/data/videos/test5.mkv
 3 | duration: '0:00:46.665000'
 4 | size: 31762747 byte
 5 | bit_rate: 5445237
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   duration: '0:00:46.667000'
10 |   width: 1024 pixel
11 |   height: 576 pixel
12 |   scan_type: PROGRESSIVE
13 |   aspect_ratio: 1.778
14 |   pixel_aspect_ratio: 1.0
15 |   resolution: 576p
16 |   frame_rate: 24.0 FPS
17 |   bit_depth: 8 bit
18 |   codec: H264
19 |   profile: MAIN
20 |   profile_level: '3.1'
21 |   media_type: video/H264
22 |   default: true
23 | audio:
24 | - id: 2
25 |   duration: '0:00:46.665000'
26 |   codec: AAC
27 |   profile: LC
28 |   channels_count: 2
29 |   channels: '2.0'
30 |   sampling_rate: 48000
31 |   compression: LOSSY
32 |   default: true
33 | - id: 10
34 |   name: Commentary
35 |   language: en
36 |   duration: '0:00:46.665000'
37 |   codec: AAC
38 |   profile: LC
39 |   channels_count: 1
40 |   channels: '1.0'
41 |   sampling_rate: 22050
42 |   compression: LOSSY
43 | subtitle:
44 | - id: 3
45 |   language: en
46 |   format: SUBRIP
47 |   default: true
48 | - id: 4
49 |   language: hu
50 |   format: SUBRIP
51 | - id: 5
52 |   language: de
53 |   format: SUBRIP
54 | - id: 6
55 |   language: fr
56 |   format: SUBRIP
57 | - id: 8
58 |   language: es
59 |   format: SUBRIP
60 | - id: 9
61 |   language: it
62 |   format: SUBRIP
63 | - id: 11
64 |   language: ja
65 |   format: SUBRIP
66 | - id: 7
67 |   format: SUBRIP
68 | provider:
69 |   name: mediainfo


--------------------------------------------------------------------------------
/tests/test_mediainfo.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | 
 5 | from tests import mediafiles
 6 | from knowit import know
 7 | 
 8 | from . import assert_expected, id_func
 9 | 
10 | 
11 | @pytest.mark.parametrize('media', mediafiles.get_json_media('mediainfo'), ids=id_func)
12 | def test_mediainfo_provider(mediainfo, media, options):
13 |     # Given
14 |     mediainfo[media.video_path] = media.input_data
15 | 
16 |     # When
17 |     actual = know(media.video_path, options)
18 | 
19 |     # Then
20 |     assert_expected(media.expected_data, actual, options)
21 |     assert pickle.loads(pickle.dumps(actual)) == actual
22 | 
23 | 
24 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mediainfo'), ids=id_func)
25 | def test_mediainfo_provider_real_media(media, options):
26 |     # Given
27 |     options['provider'] = 'mediainfo'
28 | 
29 |     # When
30 |     actual = know(media.video_path, options)
31 | 
32 |     # Then
33 |     assert_expected(media.expected_data, actual, options)
34 |     assert pickle.loads(pickle.dumps(actual)) == actual
35 | 
36 | 
37 | @pytest.mark.parametrize('media', mediafiles.get_real_media('mediainfo'), ids=id_func)
38 | def test_mediainfo_provider_real_media_cli(mediainfo_cli, media, options):
39 |     # Given
40 |     options['provider'] = 'mediainfo'
41 | 
42 |     # When
43 |     actual = know(media.video_path, options)
44 | 
45 |     # Then
46 |     assert_expected(media.expected_data, actual, options)
47 |     assert pickle.loads(pickle.dumps(actual)) == actual
48 | 


--------------------------------------------------------------------------------
/knowit/units.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import typing
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class NullRegistry:
 8 |     """A NullRegistry that masquerades as a pint.UnitRegistry."""
 9 | 
10 |     def __init__(self):
11 |         """Initialize a null registry."""
12 | 
13 |     def __getattr__(self, item: typing.Any) -> int:
14 |         """Return a Scalar 1 to simulate a unit."""
15 |         return 1
16 | 
17 |     def __call__(self, value: str) -> float:
18 |         """Try converting to int, to float and fallback to a scalar 1.0."""
19 |         try:
20 |             return int(value)
21 |         except ValueError:
22 |             try:
23 |                 return float(value)
24 |             except ValueError:
25 |                 pass
26 |         return 1
27 | 
28 |     def __bool__(self):
29 |         """Return False since a NullRegistry is not a pint.UnitRegistry."""
30 |         return False
31 | 
32 |     def define(self, *args, **kwargs):
33 |         """Pretend to add unit to the registry."""
34 | 
35 | 
36 | def _build_unit_registry():
37 |     try:
38 |         import pint
39 | 
40 |         registry = pint.UnitRegistry()
41 |         registry.define('FPS = 1 * hertz')
42 | 
43 |         pint.set_application_registry(registry)
44 |         return registry
45 |     except ModuleNotFoundError:
46 |         pass
47 |     except Exception:
48 |         logger.exception("Cannot import the pint package")
49 | 
50 |     return NullRegistry()
51 | 
52 | 
53 | units = _build_unit_registry()
54 | 


--------------------------------------------------------------------------------
/knowit/properties/audio.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from knowit.core import Configurable, Property
 4 | 
 5 | 
 6 | class BitRateMode(Configurable[str]):
 7 |     """Bit Rate mode property."""
 8 | 
 9 | 
10 | class AudioCompression(Configurable[str]):
11 |     """Audio Compression property."""
12 | 
13 | 
14 | class AudioProfile(Configurable[str]):
15 |     """Audio profile property."""
16 | 
17 | 
18 | class AudioChannels(Property[int]):
19 |     """Audio Channels property."""
20 | 
21 |     ignored = {
22 |         'object based',  # Dolby Atmos
23 |     }
24 | 
25 |     def handle(self, value: typing.Union[int, str], context: typing.MutableMapping) -> typing.Optional[int]:
26 |         """Handle audio channels."""
27 |         if isinstance(value, int):
28 |             return value
29 | 
30 |         if value.lower() not in self.ignored:
31 |             try:
32 |                 return int(value)
33 |             except ValueError:
34 |                 self.report(value, context)
35 |         return None
36 | 
37 | 
38 | class AudioCodec(Configurable[str]):
39 |     """Audio codec property."""
40 | 
41 |     @classmethod
42 |     def _extract_key(cls, value) -> str:
43 |         key = str(value).upper()
44 |         if key.startswith('A_'):
45 |             key = key[2:]
46 | 
47 |         # only the first part of the word. E.g.: 'AAC LC' => 'AAC'
48 |         return key.split(' ')[0]
49 | 
50 |     @classmethod
51 |     def _extract_fallback_key(cls, value, key) -> typing.Optional[str]:
52 |         if '/' in key:
53 |             return key.split('/')[0]
54 |         else:
55 |             return None
56 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | .venv/
 13 | .pytest_cache/
 14 | .mypy_cache/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *,cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # IPython Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv*/
 86 | venv*/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # IDE
 96 | .idea
 97 | 
 98 | # binaries
 99 | *.mkv
100 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/media_001.mkv.yml:
--------------------------------------------------------------------------------
 1 | title: Media 001
 2 | path: tests/data/mediainfo/media_001.mkv
 3 | duration: 3261.384
 4 | size: 28071815971
 5 | bit_rate: 68858659
 6 | container: mkv
 7 | video:
 8 | - id: 1
 9 |   language: en
10 |   duration: 3259.381
11 |   size: 25355039372
12 |   width: 3840
13 |   height: 2160
14 |   scan_type: PROGRESSIVE
15 |   aspect_ratio: 1.778
16 |   pixel_aspect_ratio: 1.0
17 |   resolution: 2160p
18 |   frame_rate: 23.976
19 |   bit_rate: 62232772
20 |   bit_depth: 10
21 |   codec: H265
22 |   profile: MAIN10
23 |   profile_level: 5.1
24 |   hdr_format:
25 |     - DV
26 |     - HDR10
27 |   media_type: video/H265
28 | audio:
29 | - id: 2
30 |   language: en
31 |   duration: 3259.381
32 |   size: 1912453744
33 |   codec:
34 |   - TRUEHD
35 |   - ATMOS
36 |   channels_count: 8
37 |   channels: 7.1
38 |   bit_rate: 4694029
39 |   bit_rate_mode: VBR
40 |   sampling_rate: 48000
41 |   compression: LOSSLESS
42 |   default: true
43 | - id: 3
44 |   language: en
45 |   duration: 3259.392
46 |   size: 260751360
47 |   codec: AC3
48 |   channels_count: 6
49 |   channels: 5.1
50 |   bit_rate: 640000
51 |   bit_rate_mode: CBR
52 |   sampling_rate: 48000
53 |   compression: LOSSY
54 | - id: 4
55 |   language: de
56 |   duration: 3258.272
57 |   size: 182463232
58 |   codec: AC3
59 |   channels_count: 6
60 |   channels: 5.1
61 |   bit_rate: 448000
62 |   bit_rate_mode: CBR
63 |   sampling_rate: 48000
64 |   compression: LOSSY
65 | - id: 5
66 |   language: pt
67 |   duration: 3259.382
68 |   size: 312900608
69 |   codec: DTS
70 |   channels_count: 2
71 |   channels: 2.0
72 |   bit_depth: 24
73 |   bit_rate: 768000
74 |   bit_rate_mode: CBR
75 |   sampling_rate: 48000
76 |   compression: LOSSY
77 | subtitle:
78 | - id: 6
79 |   language: en
80 |   format: PGS
81 | - id: 7
82 |   language: de
83 |   format: PGS
84 | - id: 8
85 |   language: pt
86 |   format: PGS
87 | provider:
88 |   name: mediainfo
89 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "knowit"
 3 | version = "0.5.11"
 4 | description = "Know better your media files"
 5 | authors = [
 6 |     "Rato",
 7 | ]
 8 | license = "MIT"
 9 | readme = "README.md"
10 | repository = "https://github.com/ratoaq2/knowit"
11 | keywords = [
12 |     "video",
13 |     "mkv",
14 |     "mp4",
15 |     "mediainfo",
16 |     "metadata",
17 |     "movie",
18 |     "episode",
19 |     "tv",
20 |     "shows",
21 |     "series",
22 | ]
23 | classifiers = [
24 |     "Development Status :: 5 - Production/Stable",
25 |     "Intended Audience :: Developers",
26 |     "License :: OSI Approved :: MIT License",
27 |     "Operating System :: OS Independent",
28 |     "Programming Language :: Python",
29 |     "Programming Language :: Python :: 3",
30 |     "Programming Language :: Python :: 3 :: Only",
31 |     "Programming Language :: Python :: 3.9",
32 |     "Programming Language :: Python :: 3.10",
33 |     "Programming Language :: Python :: 3.11",
34 |     "Programming Language :: Python :: 3.12",
35 |     "Programming Language :: Python :: 3.13",
36 |     "Topic :: Software Development :: Libraries :: Python Modules",
37 |     "Topic :: Multimedia :: Video",
38 | ]
39 | include = [
40 |     { path = "scripts/**", format = "sdist" },
41 |     { path = "tests/**", format = "sdist" },
42 |     { path = "setup.cfg", format = "sdist" },
43 | ]
44 | 
45 | [tool.poetry.scripts]
46 | knowit = "knowit.__main__:main"
47 | 
48 | [tool.poetry.dependencies]
49 | python = ">=3.9,<4.0"
50 | babelfish = "^0.6.1"
51 | enzyme = "^0.5.2"
52 | pint = { version = ">=0.20.1,<0.25.0", optional = true }
53 | pymediainfo = "^7.0.1"
54 | pyyaml = "^6.0"
55 | trakit = "^0.2.2"
56 | 
57 | [tool.poetry.group.dev.dependencies]
58 | pytest = "^8.4.1"
59 | pytest-cov = "^6.2.1"
60 | flake8 = "^7.3.0"
61 | requests = "^2.32.4"
62 | flake8-docstrings = "^1.7.0"
63 | flake8-import-order = "^0.19.2"
64 | pep8-naming = "^0.15.1"
65 | pydocstyle = "^6.3.0"
66 | mypy = "^1.17.0"
67 | types-requests = "^2.32.4.20250611"
68 | types-mock = "^5.2.0.20250516"
69 | typing-extensions = "^4.14.1"
70 | 
71 | [tool.poetry.extras]
72 | pint = ["pint"]
73 | 
74 | [build-system]
75 | requires = ["poetry-core"]
76 | build-backend = "poetry.core.masonry.api"
77 | 


--------------------------------------------------------------------------------
/knowit/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import typing
 3 | from logging import NullHandler, getLogger
 4 | 
 5 | import yaml
 6 | 
 7 | try:
 8 |     from importlib.resources import files
 9 | except ImportError:
10 |     from importlib_resources import files  # type: ignore[assignment,no-redef,import-not-found]
11 | 
12 | from knowit.serializer import get_yaml_loader
13 | 
14 | logger = getLogger(__name__)
15 | logger.addHandler(NullHandler())
16 | 
17 | 
18 | class _Value(typing.NamedTuple):
19 |     code: str
20 |     default: str
21 |     human: str
22 |     technical: str
23 | 
24 | 
25 | _valid_aliases = _Value._fields
26 | 
27 | 
28 | class Config:
29 |     """Application config class."""
30 | 
31 |     @classmethod
32 |     def build(cls, path: typing.Optional[typing.Union[str, os.PathLike]] = None) -> 'Config':
33 |         """Build config instance."""
34 |         loader = get_yaml_loader()
35 |         config_file = files(__package__).joinpath('defaults.yml')
36 |         with config_file.open('rb') as stream:
37 |             cfgs = [yaml.load(stream, Loader=loader)]
38 | 
39 |         if path:
40 |             with open(path, 'rb') as stream:
41 |                 cfgs.append(yaml.load(stream, Loader=loader))
42 | 
43 |         profiles_data = {}
44 |         for cfg in cfgs:
45 |             if 'profiles' in cfg:
46 |                 profiles_data.update(cfg['profiles'])
47 | 
48 |         knowledge_data = {}
49 |         for cfg in cfgs:
50 |             if 'knowledge' in cfg:
51 |                 knowledge_data.update(cfg['knowledge'])
52 | 
53 |         data: typing.Dict[str, typing.MutableMapping] = {'general': {}}
54 |         for class_name, data_map in knowledge_data.items():
55 |             data.setdefault(class_name, {})
56 |             for code, detection_values in data_map.items():
57 |                 alias_map = (profiles_data.get(class_name) or {}).get(code) or {}
58 |                 alias_map.setdefault('code', code)
59 |                 alias_map.setdefault('default', alias_map['code'])
60 |                 alias_map.setdefault('human', alias_map['default'])
61 |                 alias_map.setdefault('technical', alias_map['human'])
62 |                 value = _Value(**{k: v for k, v in alias_map.items() if k in _valid_aliases})
63 |                 for detection_value in detection_values:
64 |                     data[class_name][str(detection_value)] = value
65 | 
66 |         config = Config()
67 |         config.__dict__ = data
68 |         return config
69 | 


--------------------------------------------------------------------------------
/knowit/rules/video.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | 
 3 | from knowit.core import Rule
 4 | 
 5 | 
 6 | class ResolutionRule(Rule):
 7 |     """Resolution rule."""
 8 | 
 9 |     standard_resolutions = (
10 |         480,
11 |         720,
12 |         1080,
13 |         2160,
14 |         4320,
15 |     )
16 |     uncommon_resolutions = (
17 |         240,
18 |         288,
19 |         360,
20 |         576,
21 |     )
22 |     resolutions = list(sorted(standard_resolutions + uncommon_resolutions))
23 |     square = 4. / 3
24 |     wide = 16. / 9
25 | 
26 |     def execute(self, props, pv_props, context):
27 |         """Return the resolution for the video.
28 | 
29 |         The resolution is based on a widescreen TV (16:9)
30 |         1920x800 will be considered 1080p since the TV will use 1920x1080 with vertical black bars
31 |         1426x1080 is considered 1080p since the TV will use 1920x1080 with horizontal black bars
32 | 
33 |         The calculation considers the display aspect ratio and the pixel aspect ratio (not only width and height).
34 |         The upper resolution is selected if there's no perfect match with the following list of resolutions:
35 |             240, 288, 360, 480, 576, 720, 1080, 2160, 4320
36 |         If no interlaced information is available, resolution will be considered Progressive.
37 |         """
38 |         width = props.get('width')
39 |         height = props.get('height')
40 |         if not width or not height:
41 |             return
42 | 
43 |         try:
44 |             width = width.magnitude
45 |             height = height.magnitude
46 |         except AttributeError:
47 |             pass
48 | 
49 |         dar = props.get('aspect_ratio', Decimal(width) / height)
50 |         par = props.get('pixel_aspect_ratio', 1)
51 |         scan_type = props.get('scan_type', 'p')[0].lower()
52 | 
53 |         # selected DAR must be between 4:3 and 16:9
54 |         selected_dar = max(min(dar, self.wide), self.square)
55 | 
56 |         # mod-16
57 |         stretched_width = int(round(width * par / 16)) * 16
58 | 
59 |         # mod-8
60 |         calculated_height = int(round(stretched_width / selected_dar / 8)) * 8
61 | 
62 |         selected_resolution = None
63 |         for r in reversed(self.resolutions):
64 |             if r < calculated_height:
65 |                 break
66 | 
67 |             selected_resolution = r
68 | 
69 |         if selected_resolution:
70 |             return f'{selected_resolution}{scan_type}'
71 | 
72 |         msg = f'{width}x{height} - scan_type: {scan_type}, aspect_ratio: {dar}, pixel_aspect_ratio: {par}'
73 |         self.report(msg, context)
74 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import typing
 3 | from unittest.mock import patch
 4 | import pytest
 5 | 
 6 | from knowit.utils import build_path_candidates, detect_os
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     'os_name, sys_platform, expected', [
11 |         ('nt', None, 'windows'),
12 |         ('dos', None, 'windows'),
13 |         ('os2', None, 'windows'),
14 |         ('ce', None, 'windows'),
15 |         (None, 'darwin', 'macos'),
16 |         (None, None, 'unix'),
17 |     ]
18 | )
19 | def test_detect_os(os_name, sys_platform, expected):
20 |     with patch('knowit.utils.os') as mock_os:
21 |         mock_os.name = os_name
22 |         with patch('knowit.utils.sys') as mock_sys:
23 |             mock_sys.platform = sys_platform
24 |             assert detect_os() == expected
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'os_family, path, names, expected', [
29 |         (
30 |             'windows',
31 |             r'C:\Application;C:\Program Files\Application',
32 |             ('some.dll', 'some.exe', 'another.exe'),
33 |             [
34 |                 r'C:\Application\some.dll',
35 |                 r'C:\Application\some.exe',
36 |                 r'C:\Application\another.exe',
37 |                 r'C:\Program Files\Application\some.dll',
38 |                 r'C:\Program Files\Application\some.exe',
39 |                 r'C:\Program Files\Application\another.exe',
40 |                 r'some.dll',
41 |                 r'some.exe',
42 |                 r'another.exe',
43 |             ],
44 |         ),
45 |         (
46 |             'macos',
47 |             '/usr/sbin:/usr/bin:/sbin:/bin',
48 |             ('some.dll', 'binary', 'another_binary'),
49 |             [
50 |                 'some.dll',
51 |                 'binary',
52 |                 'another_binary',
53 |             ],
54 |         ),
55 |         (
56 |             'linux',
57 |             '/usr/sbin:/usr/bin:/sbin:/bin',
58 |             ('some.dll', 'binary', 'another_binary'),
59 |             [
60 |                 'some.dll',
61 |                 'binary',
62 |                 'another_binary',
63 |             ],
64 |         ),
65 |     ],
66 | )
67 | def test_build_path_candidates_for_specified_os(names, os_family, path, expected):
68 |     with patch('knowit.utils.os') as mock_os:
69 |         mock_os.environ = {'PATH': path}
70 |         mock_os.path = os.path  # don't mock os.path functions
71 |         candidates = build_path_candidates(names, os_family)
72 | 
73 |         def normalize_paths(paths: typing.Iterable[str]):
74 |             """Replace all slashes to a forward slash for comparison purposes."""
75 |             return [p.replace('\\', '/') for p in paths]
76 | 
77 |         assert normalize_paths(candidates) == normalize_paths(expected)
78 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/media_001.mkv.yml:
--------------------------------------------------------------------------------
  1 | title: Super Title
  2 | path: videofile.mkv
  3 | container: mkv
  4 | duration: '1:23:45.670000'
  5 | size: 12345678901 byte
  6 | bit_rate: 1231233 bps
  7 | video:
  8 | - id: 0
  9 |   name: Super Title
 10 |   width: 1920 pixel
 11 |   height: 1080 pixel
 12 |   scan_type: PROGRESSIVE
 13 |   aspect_ratio: 1.778
 14 |   pixel_aspect_ratio: 1.0
 15 |   resolution: 1080p
 16 |   frame_rate: 23.976 FPS
 17 |   bit_depth: 8 bit
 18 |   codec: H264
 19 |   profile: HIGH
 20 |   default: true
 21 | audio:
 22 | - id: 1
 23 |   name: Super Title
 24 |   language: en
 25 |   codec: DTSHD
 26 |   profile: MA
 27 |   channels_count: 8
 28 |   channels: '7.1'
 29 |   bit_depth: 24 bit
 30 |   sampling_rate: 48000 Hz
 31 |   default: true
 32 | - id: 2
 33 |   name: Super Title
 34 |   language: en
 35 |   codec: DTS
 36 |   channels_count: 6
 37 |   channels: '5.1'
 38 |   bit_rate: 1536000 bps
 39 |   sampling_rate: 48000 Hz
 40 | - id: 3
 41 |   name: Super Title
 42 |   language: en
 43 |   codec: AC3
 44 |   channels_count: 2
 45 |   channels: '2.0'
 46 |   bit_rate: 320000 bps
 47 |   sampling_rate: 48000 Hz
 48 | - id: 4
 49 |   name: Super Title
 50 |   language: fr
 51 |   codec: DTSHD
 52 |   profile: HRA
 53 |   channels_count: 6
 54 |   channels: '5.1'
 55 |   sampling_rate: 48000 Hz
 56 | - id: 5
 57 |   name: Super Title
 58 |   language: fr
 59 |   codec: DTS
 60 |   channels_count: 6
 61 |   channels: '5.1'
 62 |   bit_rate: 1536000 bps
 63 |   sampling_rate: 48000 Hz
 64 | - id: 6
 65 |   name: Super Title
 66 |   language: cs
 67 |   codec: AC3
 68 |   channels_count: 6
 69 |   channels: '5.1'
 70 |   bit_rate: 640000 bps
 71 |   sampling_rate: 48000 Hz
 72 | - id: 7
 73 |   name: Super Title
 74 |   language: hi
 75 |   codec: AC3
 76 |   channels_count: 6
 77 |   channels: '5.1'
 78 |   bit_rate: 640000 bps
 79 |   sampling_rate: 48000 Hz
 80 | subtitle:
 81 | - id: 8
 82 |   name: English-PGS
 83 |   language: en
 84 |   format: PGS
 85 | - id: 9
 86 |   name: French-PGS
 87 |   language: fr
 88 |   format: PGS
 89 | - id: 10
 90 |   name: Czech-PGS
 91 |   language: cs
 92 |   format: PGS
 93 | - id: 11
 94 |   name: Dutch-PGS
 95 |   language: nl
 96 |   format: PGS
 97 | - id: 12
 98 |   name: Arabic-PGS
 99 |   language: ar
100 |   format: PGS
101 | - id: 13
102 |   name: Danish-PGS
103 |   language: da
104 |   format: PGS
105 | - id: 14
106 |   name: Finnish-PGS
107 |   language: fi
108 |   format: PGS
109 | - id: 15
110 |   name: Norwegian-PGS
111 |   language: "no"
112 |   format: PGS
113 | - id: 16
114 |   name: Swedish-PGS
115 |   language: sv
116 |   format: PGS
117 | - id: 17
118 |   name: French-FORCED-PGS
119 |   language: fr
120 |   format: PGS
121 | - id: 18
122 |   name: Czech-FORCED-PGS
123 |   language: cs
124 |   format: PGS
125 | - id: 19
126 |   name: Hindi-FORCED-PGS
127 |   language: hi
128 |   format: PGS
129 | provider:
130 |   name: ffmpeg
131 | 
132 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock
 2 | 
 3 | import pytest
 4 | 
 5 | from knowit import api
 6 | from knowit.config import Config
 7 | from knowit.providers import EnzymeProvider
 8 | from knowit.providers.ffmpeg import FFmpegCliExecutor, FFmpegExecutor
 9 | from knowit.providers.mediainfo import MediaInfoCTypesExecutor, MediaInfoCliExecutor, MediaInfoExecutor
10 | from knowit.providers.mkvmerge import MkvMergeCliExecutor, MkvMergeExecutor
11 | 
12 | 
13 | @pytest.fixture
14 | def context():
15 |     return {
16 |         'profile': 'default',
17 |     }
18 | 
19 | 
20 | @pytest.fixture
21 | def config():
22 |     return Config.build()
23 | 
24 | 
25 | @pytest.fixture
26 | def options():
27 |     return {'profile': 'code'}
28 | 
29 | 
30 | def setup_mediainfo(executor, monkeypatch, options):
31 |     assert executor
32 |     options['provider'] = 'mediainfo'
33 |     api.available_providers.clear()
34 |     get_executor = Mock()
35 |     get_executor.return_value = executor
36 |     monkeypatch.setattr(MediaInfoExecutor, 'get_executor_instance', get_executor)
37 | 
38 |     data = {}
39 |     extract_info = executor.extract_info
40 |     monkeypatch.setattr(executor, 'extract_info',
41 |                         lambda filename: data[filename] if filename in data else extract_info(filename))
42 |     return data
43 | 
44 | 
45 | @pytest.fixture
46 | def mediainfo_cli(monkeypatch, options):
47 |     return setup_mediainfo(MediaInfoCliExecutor.create(), monkeypatch, options)
48 | 
49 | 
50 | @pytest.fixture
51 | def mediainfo(monkeypatch, options):
52 |     return setup_mediainfo(MediaInfoCTypesExecutor.create(), monkeypatch, options)
53 | 
54 | 
55 | @pytest.fixture
56 | def ffmpeg(monkeypatch, options):
57 |     options['provider'] = 'ffmpeg'
58 |     api.available_providers.clear()
59 |     executor = FFmpegCliExecutor.create()
60 |     get_executor = Mock()
61 |     get_executor.return_value = executor
62 |     monkeypatch.setattr(FFmpegExecutor, 'get_executor_instance', get_executor)
63 | 
64 |     data = {}
65 |     extract_info = executor.extract_info
66 |     monkeypatch.setattr(executor, 'extract_info',
67 |                         lambda filename: data[filename] if filename in data else extract_info(filename))
68 |     return data
69 | 
70 | 
71 | @pytest.fixture
72 | def mkvmerge(monkeypatch, options):
73 |     options['provider'] = 'mkvmerge'
74 |     api.available_providers.clear()
75 |     executor = MkvMergeCliExecutor.create()
76 |     get_executor = Mock()
77 |     get_executor.return_value = executor
78 |     monkeypatch.setattr(MkvMergeExecutor, 'get_executor_instance', get_executor)
79 | 
80 |     data = {}
81 |     extract_info = executor.extract_info
82 |     monkeypatch.setattr(executor, 'extract_info',
83 |                         lambda filename: data[filename] if filename in data else extract_info(filename))
84 |     return data
85 | 
86 | 
87 | @pytest.fixture
88 | def enzyme(monkeypatch, options):
89 |     options['provider'] = 'enzyme'
90 | 
91 |     data = {}
92 |     extract_info = EnzymeProvider.extract_info
93 |     monkeypatch.setattr(EnzymeProvider, 'extract_info',
94 |                         lambda cls, filename: data[filename] if filename in data else extract_info(filename))
95 | 
96 |     return data
97 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/several-tracks.mkv.yml:
--------------------------------------------------------------------------------
  1 | path: tests/data/several-tracks.mkv
  2 | duration: 2353.6
  3 | size: 5796819279
  4 | bit_rate: 19703669
  5 | container: mkv
  6 | video:
  7 | - id: 1
  8 |   language: en
  9 |   duration: 2353.584
 10 |   size: 5569279175
 11 |   width: 3840
 12 |   height: 2160
 13 |   scan_type: PROGRESSIVE
 14 |   aspect_ratio: 1.778
 15 |   pixel_aspect_ratio: 1.0
 16 |   resolution: 2160p
 17 |   frame_rate: 24.0
 18 |   bit_rate: 18930377
 19 |   bit_depth: 8
 20 |   codec: H265
 21 |   profile: MAIN
 22 |   profile_level: '5'
 23 |   encoder: X265
 24 |   media_type: video/H265
 25 |   default: true
 26 | audio:
 27 | - id: 2
 28 |   language: en
 29 |   duration: 2353.6
 30 |   size: 225945600
 31 |   codec:
 32 |   - EAC3
 33 |   - ATMOS
 34 |   channels_count: 6
 35 |   channels: 5.1
 36 |   bit_rate: 768000
 37 |   bit_rate_mode: CBR
 38 |   sampling_rate: 48000
 39 |   compression: LOSSY
 40 |   default: true
 41 | subtitle:
 42 | - id: 3
 43 |   name: English (SDH)
 44 |   language: en
 45 |   hearing_impaired: true
 46 |   format: SUBRIP
 47 | - id: 4
 48 |   name: Čeština
 49 |   language: cs
 50 |   format: SUBRIP
 51 | - id: 5
 52 |   name: Dansk
 53 |   language: da
 54 |   format: SUBRIP
 55 | - id: 6
 56 |   name: Deutsch
 57 |   language: de
 58 |   format: SUBRIP
 59 | - id: 7
 60 |   name: Greek
 61 |   language: el
 62 |   format: SUBRIP
 63 | - id: 8
 64 |   name: Español
 65 |   language: es
 66 |   format: SUBRIP
 67 | - id: 9
 68 |   name: Español (Latinoamericano)
 69 |   language: es-419
 70 |   format: SUBRIP
 71 | - id: 10
 72 |   name: Suomi
 73 |   language: fi
 74 |   format: SUBRIP
 75 | - id: 11
 76 |   name: Français (Canadien)
 77 |   language: fr-CA
 78 |   format: SUBRIP
 79 | - id: 12
 80 |   name: Français
 81 |   language: fr
 82 |   format: SUBRIP
 83 | - id: 13
 84 |   name: Magyar
 85 |   language: hu
 86 |   format: SUBRIP
 87 | - id: 14
 88 |   name: Italiano
 89 |   language: it
 90 |   format: SUBRIP
 91 | - id: 15
 92 |   name: Japanese
 93 |   language: ja
 94 |   format: SUBRIP
 95 | - id: 16
 96 |   name: Korean
 97 |   language: ko
 98 |   format: SUBRIP
 99 | - id: 17
100 |   name: Nederlands
101 |   language: nl
102 |   format: SUBRIP
103 | - id: 18
104 |   name: Norsk
105 |   language: 'no'
106 |   format: SUBRIP
107 | - id: 19
108 |   name: Polski
109 |   language: pl
110 |   format: SUBRIP
111 | - id: 20
112 |   name: Português
113 |   language: pt
114 |   format: SUBRIP
115 | - id: 21
116 |   name: Português (Brasil)
117 |   language: pt-BR
118 |   format: SUBRIP
119 | - id: 22
120 |   name: Română
121 |   language: ro
122 |   format: SUBRIP
123 | - id: 23
124 |   name: Slovenčina
125 |   language: sk
126 |   format: SUBRIP
127 | - id: 24
128 |   name: Svenska
129 |   language: sv
130 |   format: SUBRIP
131 | - id: 25
132 |   name: Türkçe
133 |   language: tr
134 |   format: SUBRIP
135 | - id: 26
136 |   name: Chinese (Hong Kong)
137 |   language: zh-HK
138 |   format: SUBRIP
139 | - id: 27
140 |   name: Chinese (Traditional)
141 |   language: zh-Hant
142 |   format: SUBRIP
143 | - id: 28
144 |   name: Chinese (Simplified)
145 |   language: zh-Hans
146 |   format: SUBRIP
147 | - id: 29
148 |   name: Indonesian
149 |   language: id
150 |   format: SUBRIP
151 | - id: 30
152 |   name: Malay
153 |   language: ms
154 |   format: SUBRIP
155 | - id: 31
156 |   name: Thai
157 |   language: th
158 |   format: SUBRIP
159 | - id: 32
160 |   name: Chinese
161 |   language: zh
162 |   format: SUBRIP
163 | provider:
164 |   name: mediainfo


--------------------------------------------------------------------------------
/knowit/properties/video.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import typing
  3 | from decimal import Decimal
  4 | 
  5 | from knowit.core import Configurable
  6 | from knowit.core import Property
  7 | from knowit.utils import round_decimal
  8 | 
  9 | 
 10 | class VideoCodec(Configurable[str]):
 11 |     """Video Codec handler."""
 12 | 
 13 |     @classmethod
 14 |     def _extract_key(cls, value) -> str:
 15 |         key = value.upper().split('/')[-1]
 16 |         if key.startswith('V_'):
 17 |             key = key[2:]
 18 | 
 19 |         return key.split(' ')[-1]
 20 | 
 21 | 
 22 | class VideoDimensions(Property[int]):
 23 |     """Dimensions property."""
 24 | 
 25 |     def __init__(self, *args: str, dimension='width' or 'height', **kwargs):
 26 |         """Initialize the object."""
 27 |         super().__init__(*args, **kwargs)
 28 |         self.dimension = dimension
 29 | 
 30 |     dimensions_re = re.compile(r'(?P<width>\d+)x(?P<height>\d+)')
 31 | 
 32 |     def handle(self, value, context) -> typing.Optional[int]:
 33 |         """Handle ratio."""
 34 |         match = self.dimensions_re.match(value)
 35 |         if match:
 36 |             match_dict = match.groupdict()
 37 |             try:
 38 |                 value = match_dict[self.dimension]
 39 |             except KeyError:
 40 |                 pass
 41 |             else:
 42 |                 return int(value)
 43 | 
 44 |         self.report(value, context)
 45 |         return None
 46 | 
 47 | 
 48 | class VideoEncoder(Configurable):
 49 |     """Video Encoder property."""
 50 | 
 51 | 
 52 | class VideoHdrFormat(Configurable):
 53 |     """Video HDR Format property."""
 54 | 
 55 | 
 56 | class VideoProfile(Configurable[str]):
 57 |     """Video Profile property."""
 58 | 
 59 |     @classmethod
 60 |     def _extract_key(cls, value) -> str:
 61 |         return value.upper().split('@')[0]
 62 | 
 63 | 
 64 | class VideoProfileLevel(Configurable[str]):
 65 |     """Video Profile Level property."""
 66 | 
 67 |     @classmethod
 68 |     def _extract_key(cls, value) -> typing.Union[str, bool]:
 69 |         values = str(value).upper().split('@')
 70 |         if len(values) > 1:
 71 |             value = values[1]
 72 |             return value
 73 | 
 74 |         # There's no level, so don't warn or report it
 75 |         return False
 76 | 
 77 | 
 78 | class VideoProfileTier(Configurable[str]):
 79 |     """Video Profile Tier property."""
 80 | 
 81 |     @classmethod
 82 |     def _extract_key(cls, value) -> typing.Union[str, bool]:
 83 |         values = str(value).upper().split('@')
 84 |         if len(values) > 2:
 85 |             return values[2]
 86 | 
 87 |         # There's no tier, so don't warn or report it
 88 |         return False
 89 | 
 90 | 
 91 | class Ratio(Property[Decimal]):
 92 |     """Ratio property."""
 93 | 
 94 |     def __init__(self, *args: str, unit=None, **kwargs):
 95 |         """Initialize the object."""
 96 |         super().__init__(*args, **kwargs)
 97 |         self.unit = unit
 98 | 
 99 |     ratio_re = re.compile(r'(?P<width>\d+)[:/](?P<height>\d+)')
100 | 
101 |     def handle(self, value, context) -> typing.Optional[Decimal]:
102 |         """Handle ratio."""
103 |         match = self.ratio_re.match(value)
104 |         if match:
105 |             width, height = match.groups()
106 |             if (width, height) == ('0', '1'):  # identity
107 |                 return Decimal('1.0')
108 | 
109 |             if height:
110 |                 result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
111 |                 if self.unit:
112 |                     result *= self.unit
113 | 
114 |                 return result
115 | 
116 |         self.report(value, context)
117 |         return None
118 | 
119 | 
120 | class ScanType(Configurable[str]):
121 |     """Scan Type property."""
122 | 


--------------------------------------------------------------------------------
/knowit/rules/audio.py:
--------------------------------------------------------------------------------
  1 | import typing
  2 | from decimal import Decimal, InvalidOperation
  3 | from logging import NullHandler, getLogger
  4 | 
  5 | from knowit.core import Rule
  6 | 
  7 | logger = getLogger(__name__)
  8 | logger.addHandler(NullHandler())
  9 | 
 10 | 
 11 | class AtmosRule(Rule):
 12 |     """Atmos rule."""
 13 | 
 14 |     def __init__(self, config: typing.Mapping[str, typing.Mapping], name: str,
 15 |                  **kwargs):
 16 |         """Initialize an Atmos rule."""
 17 |         super().__init__(name, **kwargs)
 18 |         self.audio_codecs = getattr(config, 'AudioCodec')
 19 | 
 20 |     def execute(self, props, pv_props, context):
 21 |         """Execute the rule against properties."""
 22 |         profile = context.get('profile') or 'default'
 23 |         format_commercial = pv_props.get('format_commercial')
 24 |         if 'codec' in props and format_commercial and 'atmos' in format_commercial.lower():
 25 |             props['codec'] = [props['codec'],
 26 |                               getattr(self.audio_codecs['ATMOS'], profile)]
 27 | 
 28 | 
 29 | class AudioChannelsRule(Rule):
 30 |     """Audio Channel rule."""
 31 | 
 32 |     mapping = {
 33 |         1: '1.0',
 34 |         2: '2.0',
 35 |         6: '5.1',
 36 |         8: '7.1',
 37 |     }
 38 | 
 39 |     def execute(self, props, pv_props, context):
 40 |         """Execute the rule against properties."""
 41 |         count = props.get('channels_count')
 42 |         if count is None:
 43 |             return
 44 | 
 45 |         channels = self.mapping.get(count) if isinstance(count, int) else None
 46 |         positions = pv_props.get('channel_positions') or []
 47 |         positions = positions if isinstance(positions, list) else [positions]
 48 |         candidate = 0
 49 |         for position in positions:
 50 |             if not position:
 51 |                 continue
 52 | 
 53 |             c = Decimal('0.0')
 54 |             for i in position.split('/'):
 55 |                 try:
 56 |                     c += Decimal(i.replace('.?', ''))
 57 |                 except (ValueError, InvalidOperation):
 58 |                     logger.debug('Invalid %s: %s', self.description, i)
 59 |                     pass
 60 | 
 61 |             c_count = int(c) + int(round((c - int(c)) * 10))
 62 |             if c_count == count:
 63 |                 return str(c)
 64 | 
 65 |             candidate = max(candidate, c)
 66 | 
 67 |         if channels:
 68 |             return channels
 69 | 
 70 |         if candidate:
 71 |             return candidate
 72 | 
 73 |         self.report(positions, context)
 74 | 
 75 | 
 76 | class DtsHdRule(Rule):
 77 |     """DTS-HD rule."""
 78 | 
 79 |     def __init__(self, config: typing.Mapping[str, typing.Mapping], name: str,
 80 |                  **kwargs):
 81 |         """Initialize a DTS-HD Rule."""
 82 |         super().__init__(name, **kwargs)
 83 |         self.audio_codecs = getattr(config, 'AudioCodec')
 84 |         self.audio_profiles = getattr(config, 'AudioProfile')
 85 | 
 86 |     @classmethod
 87 |     def _redefine(cls, props, name, index):
 88 |         actual = props.get(name)
 89 |         if isinstance(actual, list):
 90 |             value = actual[index]
 91 |             if value is None:
 92 |                 del props[name]
 93 |             else:
 94 |                 props[name] = value
 95 | 
 96 |     def execute(self, props, pv_props, context):
 97 |         """Execute the rule against properties."""
 98 |         profile = context.get('profile') or 'default'
 99 | 
100 |         if props.get('codec') == getattr(self.audio_codecs['DTS'],
101 |                                          profile) and props.get('profile') in (
102 |                 getattr(self.audio_profiles['MA'], profile),
103 |                 getattr(self.audio_profiles['HRA'], profile)):
104 |             props['codec'] = getattr(self.audio_codecs['DTS-HD'], profile)
105 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "streams": [
  3 |         {
  4 |             "index": 0,
  5 |             "codec_name": "h264",
  6 |             "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
  7 |             "profile": "Main",
  8 |             "codec_type": "video",
  9 |             "codec_time_base": "1001/48000",
 10 |             "codec_tag_string": "[0][0][0][0]",
 11 |             "codec_tag": "0x0000",
 12 |             "width": 1920,
 13 |             "height": 1080,
 14 |             "coded_width": 1920,
 15 |             "coded_height": 1080,
 16 |             "has_b_frames": 1,
 17 |             "sample_aspect_ratio": "1:1",
 18 |             "display_aspect_ratio": "16:9",
 19 |             "pix_fmt": "yuv420p",
 20 |             "level": 40,
 21 |             "chroma_location": "left",
 22 |             "field_order": "progressive",
 23 |             "refs": 1,
 24 |             "is_avc": "true",
 25 |             "nal_length_size": "4",
 26 |             "r_frame_rate": "24000/1001",
 27 |             "avg_frame_rate": "24000/1001",
 28 |             "time_base": "1/1000",
 29 |             "start_pts": 0,
 30 |             "start_time": "0:00:00.000000",
 31 |             "bits_per_raw_sample": "8",
 32 |             "disposition": {
 33 |                 "default": 1,
 34 |                 "dub": 0,
 35 |                 "original": 0,
 36 |                 "comment": 0,
 37 |                 "lyrics": 0,
 38 |                 "karaoke": 0,
 39 |                 "forced": 0,
 40 |                 "hearing_impaired": 0,
 41 |                 "visual_impaired": 0,
 42 |                 "clean_effects": 0,
 43 |                 "attached_pic": 0,
 44 |                 "timed_thumbnails": 0
 45 |             }
 46 |         },
 47 |         {
 48 |             "index": 1,
 49 |             "codec_name": "dts",
 50 |             "codec_long_name": "DCA (DTS Coherent Acoustics)",
 51 |             "profile": "DTS-HD MA",
 52 |             "codec_type": "audio",
 53 |             "codec_time_base": "1/48000",
 54 |             "codec_tag_string": "[0][0][0][0]",
 55 |             "codec_tag": "0x0000",
 56 |             "sample_fmt": "s32p",
 57 |             "sample_rate": "48000",
 58 |             "channels": 8,
 59 |             "channel_layout": "7.1",
 60 |             "bits_per_sample": 0,
 61 |             "r_frame_rate": "0/0",
 62 |             "avg_frame_rate": "0/0",
 63 |             "time_base": "1/1000",
 64 |             "start_pts": 0,
 65 |             "start_time": "0:00:00.000000",
 66 |             "bits_per_raw_sample": "24",
 67 |             "disposition": {
 68 |                 "default": 1,
 69 |                 "dub": 0,
 70 |                 "original": 0,
 71 |                 "comment": 0,
 72 |                 "lyrics": 0,
 73 |                 "karaoke": 0,
 74 |                 "forced": 0,
 75 |                 "hearing_impaired": 0,
 76 |                 "visual_impaired": 0,
 77 |                 "clean_effects": 0,
 78 |                 "attached_pic": 0,
 79 |                 "timed_thumbnails": 0
 80 |             },
 81 |             "tags": {
 82 |                 "language": "eng",
 83 |                 "title": "7.1Ch DTS-HD MA"
 84 |             }
 85 |         }
 86 |     ],
 87 |     "format": {
 88 |         "filename": "tests/data/ffmpeg/7.1-dts-hd-ma-speaker-mapping-test-file.mkv",
 89 |         "nb_streams": 2,
 90 |         "nb_programs": 0,
 91 |         "format_name": "matroska,webm",
 92 |         "format_long_name": "Matroska / WebM",
 93 |         "start_time": "0:00:00.000000",
 94 |         "duration": "0:01:37.931000",
 95 |         "size": "40772443",
 96 |         "bit_rate": "3330707",
 97 |         "probe_score": 100,
 98 |         "tags": {
 99 |             "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File",
100 |             "encoder": "libebml v1.3.0 + libmatroska v1.4.1",
101 |             "creation_time": "2013-12-13T17:49:28.000000Z"
102 |         }
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/tests/test_properties.yml:
--------------------------------------------------------------------------------
  1 | VideoCodec:
  2 |   H.264:
  3 |     - V_MPEG4/ISO/AVC
  4 |     - AVC
  5 |   Microsoft MPEG-4 v2: MP42
  6 |   WMV 7: WMV1
  7 |   WebCam JPEG: CJPG
  8 |   H.265: V_MPEGH/ISO/HEVC
  9 |   Xvid: XVID
 10 |   MPEG-1: MPEG-1V
 11 |   VP8: V_VP8
 12 |   VC-1:
 13 |     - VC-1
 14 |     - WMV3
 15 |   WMV 8: WMV2
 16 | 
 17 | VideoEncoder:
 18 |   DivX: DivX
 19 |   x264: x264
 20 |   x265: x265
 21 |   Xvid: XviD
 22 |   Vimeo: Vimeo Encoder
 23 | 
 24 | VideoProfile:
 25 |   Advanced: Advanced@L3
 26 |   Advanced Simple: Advanced Simple@L5
 27 |   Simple:
 28 |     - Simple@L1
 29 |     - Simple@L3
 30 |   Baseline:
 31 |     - Baseline@L1.0
 32 |     - Baseline@L1.1
 33 |     - Baseline@L1.3
 34 |     - Baseline@L2
 35 |     - Baseline@L2.1
 36 |     - Baseline@L3
 37 |     - Baseline@L3.0
 38 |     - Baseline@L3.2
 39 |     - Baseline@L4.1
 40 |   Main:
 41 |     - Main
 42 |     - Main@Main
 43 |     - Main@L1.3
 44 |     - Main@L2.1
 45 |     - Main@L3
 46 |     - Main@L3.0
 47 |     - Main@L3.1
 48 |     - Main@L3.1@Main
 49 |     - Main@L3.2
 50 |     - Main@L4
 51 |     - Main@L4.0
 52 |     - Main@L4@Main
 53 |     - Main@L5@Main
 54 |     - Main@High
 55 |     - Main@High 1440
 56 |   Main 10:
 57 |     - Main 10@L2.1@Main
 58 |     - Main 10@L3@Main
 59 |     - Main 10@L3.1@Main
 60 |     - Main 10@L4@Main
 61 |     - Main 10@L4@High
 62 |     - Main 10@L4.1@Main
 63 |     - Main 10@L5@Main
 64 |     - Main 10@L5.1@Main
 65 |   High:
 66 |     - High@L3
 67 |     - High@L3.0
 68 |     - High@L3.1
 69 |     - High@L3.2
 70 |     - High@L4
 71 |     - High@L4.0 / High@L4.0
 72 |     - High@L4.0
 73 |     - High@L4.2
 74 |     - High@L5
 75 |     - High@L5.1
 76 | 
 77 | VideoProfileLevel:
 78 |   '1':
 79 |     - Baseline@L1.0
 80 |     - Simple@L1
 81 |   '1.1': Baseline@L1.1
 82 |   '1.3':
 83 |     - Baseline@L1.3
 84 |     - Main@L1.3
 85 |   '2': Baseline@L2
 86 |   '2.1':
 87 |     - Baseline@L2.1
 88 |     - Main@L2.1
 89 |     - Main 10@L2.1@Main
 90 |   '2.2':
 91 |     - High@L2.2
 92 |   '3':
 93 |     - Advanced@L3
 94 |     - Simple@L3
 95 |     - Baseline@L3
 96 |     - Baseline@L3.0
 97 |     - Main@L3
 98 |     - Main@L3.0
 99 |     - Main 10@L3@Main
100 |     - High@L3
101 |     - High@L3.0
102 |   '3.1':
103 |     - Main@L3.1
104 |     - Main@L3.1@Main
105 |     - Main 10@L3.1@Main
106 |     - High@L3.1
107 |   '3.2':
108 |     - Baseline@L3.2
109 |     - Main@L3.2
110 |     - High@L3.2
111 |   '4':
112 |     - Main@L4
113 |     - Main@L4.0
114 |     - Main@L4@Main
115 |     - Main 10@L4@Main
116 |     - Main 10@L4@High
117 |     - High@L4
118 |     - High@L4.0
119 |     - High@L4.0 / High@L4.0
120 |   '4.1':
121 |     - Baseline@L4.1
122 |     - Main 10@L4.1@Main
123 |   '4.2':
124 |     - High@L4.2
125 |   '5':
126 |     - Advanced Simple@L5
127 |     - Main@L5@Main
128 |     - Main 10@L5.0
129 |     - Main 10@L5@Main
130 |     - High@L5
131 |   '5.1':
132 |     - High@L5.1
133 |     - Main 10@L5.1@Main
134 |   Main:
135 |     - Main@Main
136 |   High:
137 |     - Main@High
138 |   High 1440:
139 |     - Main@High 1440
140 | 
141 | VideoProfileTier:
142 |   Main:
143 |     - Main@L3.1@Main
144 |     - Main@L4@Main
145 |     - Main@L5@Main
146 |     - Main 10@L2.1@Main
147 |     - Main 10@L3@Main
148 |     - Main 10@L3.1@Main
149 |     - Main 10@L4@Main
150 |     - Main 10@L4.1@Main
151 |     - Main 10@L5@Main
152 |     - Main 10@L5.1@Main
153 |   High:
154 |     - Main 10@L4@High
155 | 
156 | VideoHdrFormat:
157 |   Dolby Vision:
158 |     - Dolby Vision
159 |   HDR10:
160 |     - SMPTE ST 2086
161 | 
162 | ScanType:
163 |   Progressive:
164 |     - Progressive
165 |     - Progressive / Progressive
166 |   Interlaced: MBAFF
167 | 
168 | AudioCodec:
169 |   AAC:
170 |     - AAC LC
171 |     - AAC LC-SBR
172 |     - AAC LC-SBR-PS
173 |   AC-3: AC3
174 |   E-AC-3: AC3+
175 |   Atmos: Atmos
176 |   TrueHD:
177 |     - TrueHD
178 |     - A_TRUEHD
179 |   DTS: DTS
180 |   DTS-HD: DTS-HD
181 |   FLAC: FLAC
182 |   PCM: PCM
183 |   Vorbis: Vorbis
184 |   MP2: MPA1L2
185 |   MP3:
186 |     - MPA1L3
187 |     - MPA2L3
188 |   WMA 2: 161
189 |   WMA Pro: 162
190 |   RT29 MetaSound: 75
191 | 
192 | AudioProfile:
193 |   High Efficiency: HE-AAC
194 |   High Efficiency v2: HE-AACv2
195 |   Master Audio: MA
196 |   Low Complexity: LC
197 |   High Resolution Audio: HRA
198 |   96/24: 96/24
199 |   Extended Surround:
200 |     - ES Discrete
201 |     - ES Matrix
202 |   Layer 2: Layer 2
203 |   Layer 3: Layer 3
204 |   Pro: Pro
205 | 
206 | AudioCompression:
207 |   Lossy: Lossy
208 |   Lossless: Lossless
209 | 
210 | BitRateMode:
211 |   Constant: CBR
212 |   Variable: VBR
213 | 
214 | SubtitleFormat:
215 |   ASS: S_TEXT/ASS
216 |   SSA: S_TEXT/SSA
217 |   VobSub: S_VOBSUB
218 |   PGS: S_HDMV/PGS
219 |   SubRip: S_TEXT/UTF8
220 |   DVBSub: 6
221 | 


--------------------------------------------------------------------------------
/knowit/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import typing
  4 | from decimal import Decimal
  5 | 
  6 | from knowit import VIDEO_EXTENSIONS
  7 | 
  8 | OS_FAMILY = typing.Literal['windows', 'macos', 'unix']
  9 | 
 10 | OPTION_MAP = typing.Dict[str, typing.Tuple[str]]
 11 | 
 12 | 
 13 | def recurse_paths(
 14 |         paths: typing.Union[str, typing.Iterable[str]]
 15 | ) -> typing.List[str]:
 16 |     """Return a list of video files."""
 17 |     enc_paths = []
 18 | 
 19 |     if isinstance(paths, str):
 20 |         paths = [p.strip() for p in paths.split(',')] if ',' in paths else paths.split()
 21 | 
 22 |     for path in paths:
 23 |         if os.path.isfile(path):
 24 |             enc_paths.append(path)
 25 |         if os.path.isdir(path):
 26 |             for root, directories, filenames in os.walk(path):
 27 |                 for filename in filenames:
 28 |                     if os.path.splitext(filename)[1] in VIDEO_EXTENSIONS:
 29 |                         full_path = os.path.join(root, filename)
 30 |                         enc_paths.append(full_path)
 31 | 
 32 |     # Lets remove any dupes since mediainfo is rather slow.
 33 |     unique_paths = dict.fromkeys(enc_paths)
 34 |     return list(unique_paths)
 35 | 
 36 | 
 37 | def to_dict(
 38 |         obj: typing.Any,
 39 |         classkey: typing.Optional[typing.Type] = None
 40 | ) -> typing.Union[str, dict, list]:
 41 |     """Transform an object to dict."""
 42 |     if isinstance(obj, str):
 43 |         return obj
 44 |     elif isinstance(obj, dict):
 45 |         data = {}
 46 |         for (k, v) in obj.items():
 47 |             data[k] = to_dict(v, classkey)
 48 |         return data
 49 |     elif hasattr(obj, '_ast'):
 50 |         return to_dict(obj._ast())
 51 |     elif hasattr(obj, '__iter__'):
 52 |         return [to_dict(v, classkey) for v in obj]
 53 |     elif hasattr(obj, '__dict__'):
 54 |         values = [(key, to_dict(value, classkey))
 55 |                   for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')]
 56 |         data = {k: v for k, v in values if v is not None}
 57 |         if classkey is not None and hasattr(obj, '__class__'):
 58 |             data[classkey] = obj.__class__.__name__
 59 |         return data
 60 |     return obj
 61 | 
 62 | 
 63 | def detect_os() -> OS_FAMILY:
 64 |     """Detect os family: windows, macos or unix."""
 65 |     if os.name in ('nt', 'dos', 'os2', 'ce'):
 66 |         return 'windows'
 67 |     if sys.platform == 'darwin':
 68 |         return 'macos'
 69 |     return 'unix'
 70 | 
 71 | 
 72 | def define_candidate(
 73 |         locations: OPTION_MAP,
 74 |         names: OPTION_MAP,
 75 |         os_family: typing.Optional[OS_FAMILY] = None,
 76 |         suggested_path: typing.Optional[str] = None,
 77 | ) -> typing.Generator[str, None, None]:
 78 |     """Select family-specific options and generate possible candidates."""
 79 |     os_family = os_family or detect_os()
 80 |     family_names = names[os_family]
 81 |     all_locations = (suggested_path, ) + locations[os_family]
 82 |     yield from build_candidates(all_locations, family_names)
 83 | 
 84 | 
 85 | def build_candidates(
 86 |         locations: typing.Iterable[typing.Optional[str]],
 87 |         names: typing.Iterable[str],
 88 | ) -> typing.Generator[str, None, None]:
 89 |     """Build candidate names."""
 90 |     for location in locations:
 91 |         if not location:
 92 |             continue
 93 |         if location == '__PATH__':
 94 |             yield from build_path_candidates(names)
 95 |         elif os.path.isfile(location):
 96 |             yield location
 97 |         elif os.path.isdir(location):
 98 |             for name in names:
 99 |                 cmd = os.path.join(location, name)
100 |                 if os.path.isfile(cmd):
101 |                     yield cmd
102 | 
103 | 
104 | def build_path_candidates(
105 |     names: typing.Iterable[str],
106 |     os_family: typing.Optional[OS_FAMILY] = None,
107 | ) -> typing.Generator[str, None, None]:
108 |     """Build candidate names on environment PATH."""
109 |     os_family = os_family or detect_os()
110 |     if os_family != 'windows':
111 |         yield from names
112 |     else:
113 |         paths = os.environ['PATH'].split(';')
114 |         yield from (
115 |             os.path.join(path, name)
116 |             for path in paths
117 |             for name in names
118 |         )
119 |         yield from names
120 | 
121 | 
122 | def round_decimal(value: Decimal, min_digits=0, max_digits: typing.Optional[int] = None):
123 |     exponent = int(value.normalize().as_tuple().exponent)
124 |     if exponent >= 0:
125 |         return round(value, min_digits)
126 | 
127 |     decimal_places = abs(exponent)
128 |     if decimal_places <= min_digits:
129 |         return round(value, min_digits)
130 |     if max_digits:
131 |         return round(value, min(max_digits, decimal_places))
132 |     return value
133 | 


--------------------------------------------------------------------------------
/knowit/api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import traceback
  3 | import typing
  4 | 
  5 | from knowit import __version__
  6 | from knowit.config import Config
  7 | from knowit.provider import Provider
  8 | from .providers import (
  9 |     EnzymeProvider,
 10 |     FFmpegProvider,
 11 |     MediaInfoProvider,
 12 |     MkvMergeProvider,
 13 | )
 14 | 
 15 | _provider_map = {
 16 |     'mediainfo': MediaInfoProvider,
 17 |     'ffmpeg': FFmpegProvider,
 18 |     'mkvmerge': MkvMergeProvider,
 19 |     'enzyme': EnzymeProvider,
 20 | }
 21 | 
 22 | provider_names = _provider_map.keys()
 23 | 
 24 | available_providers: typing.Dict[str, Provider] = {}
 25 | 
 26 | 
 27 | class KnowitException(Exception):
 28 |     """Exception raised when knowit encounters an internal error."""
 29 | 
 30 | 
 31 | def initialize(context: typing.Optional[typing.Mapping] = None, *, force: bool = False) -> None:
 32 |     """Initialize knowit, reload provider if a new suggested path is given."""
 33 |     context = context or {}
 34 |     config = Config.build(context.get('config'))
 35 |     for name, provider_cls in _provider_map.items():
 36 |         general_config = getattr(config, 'general', {})
 37 |         suggested_path = context.get(name) or general_config.get(name)
 38 |         # create provider if it is not initialized or if it is not loaded and suggesting a new path
 39 |         p = available_providers.get(name)
 40 |         if force or p is None or (
 41 |             not p.loaded() and not p.match_executor_location(suggested_path)
 42 |         ):
 43 |             available_providers[name] = provider_cls(config, suggested_path)
 44 | 
 45 | 
 46 | def know(
 47 |         video_path: typing.Union[str, os.PathLike],
 48 |         context: typing.Optional[typing.MutableMapping] = None
 49 | ) -> typing.Mapping:
 50 |     """Return a mapping of video metadata."""
 51 |     video_path = os.fspath(video_path)
 52 | 
 53 |     try:
 54 |         context = context or {}
 55 |         context.setdefault('profile', 'default')
 56 |         initialize(context)
 57 | 
 58 |         for name, provider in available_providers.items():
 59 |             if name != (context.get('provider') or name):
 60 |                 continue
 61 | 
 62 |             if provider.accepts(video_path):
 63 |                 result = provider.describe(video_path, context)
 64 |                 if result:
 65 |                     return result
 66 | 
 67 |         return {}
 68 |     except Exception:
 69 |         raise KnowitException(debug_info(context=context, exc_info=True))
 70 | 
 71 | 
 72 | def dependencies(context: typing.Optional[typing.Mapping] = None) -> typing.Mapping:
 73 |     """Return all dependencies detected by knowit."""
 74 |     deps = {}
 75 |     try:
 76 |         initialize(context)
 77 |         for name in _provider_map:
 78 |             if name in available_providers:
 79 |                 deps[name] = available_providers[name].version
 80 |             else:
 81 |                 deps[name] = {}
 82 |     except Exception:
 83 |         pass
 84 | 
 85 |     return deps
 86 | 
 87 | 
 88 | def loaded_providers(options: typing.Union[dict[str, typing.Any], None] = None) -> dict[str, bool]:
 89 |     """Return a dict with each provider and if they are installed."""
 90 |     # initialize providers with options
 91 |     initialize(options)
 92 | 
 93 |     # return a dict of providers and the loaded state
 94 |     return {k: p.loaded() for k, p in available_providers.items()}
 95 | 
 96 | 
 97 | def _centered(value: str) -> str:
 98 |     value = value[-52:]
 99 |     return f'| {value:^53} |'
100 | 
101 | 
102 | def debug_info(
103 |         context: typing.Optional[typing.MutableMapping] = None,
104 |         exc_info: bool = False,
105 | ) -> str:
106 |     lines = [
107 |         '+-------------------------------------------------------+',
108 |         _centered(f'KnowIt {__version__}'),
109 |         '+-------------------------------------------------------+'
110 |     ]
111 | 
112 |     first = True
113 |     for info in dependencies(context).values():
114 |         if not first:
115 |             lines.append(_centered(''))
116 |         first = False
117 | 
118 |         for k, v in info.items():
119 |             lines.append(_centered(k))
120 |             lines.append(_centered(v))
121 | 
122 |     if context:
123 |         debug_data = context.pop('debug_data', None)
124 | 
125 |         lines.append('+-------------------------------------------------------+')
126 |         for k, v in context.items():
127 |             if v:
128 |                 lines.append(_centered(f'{k}: {v}'))
129 | 
130 |         if debug_data:
131 |             lines.append('+-------------------------------------------------------+')
132 |             lines.append(debug_data())
133 | 
134 |     if exc_info:
135 |         lines.append('+-------------------------------------------------------+')
136 |         lines.append(traceback.format_exc())
137 | 
138 |     lines.append('+-------------------------------------------------------+')
139 |     lines.append(_centered('Please report any bug or feature request at'))
140 |     lines.append(_centered('https://github.com/ratoaq2/knowit/issues.'))
141 |     lines.append('+-------------------------------------------------------+')
142 | 
143 |     return '\n'.join(lines)
144 | 


--------------------------------------------------------------------------------
/knowit/properties/general.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import typing
  3 | from datetime import timedelta
  4 | from decimal import Decimal, InvalidOperation
  5 | 
  6 | import babelfish
  7 | 
  8 | from knowit.core import Configurable, Property, T
  9 | from knowit.utils import round_decimal
 10 | 
 11 | 
 12 | class Basic(Property[T]):
 13 |     """Basic property to handle int, Decimal and other basic types."""
 14 | 
 15 |     def __init__(self, *args: str, data_type: typing.Type,
 16 |                  processor: typing.Optional[typing.Callable[[T], T]] = None,
 17 |                  allow_fallback: bool = False, **kwargs):
 18 |         """Init method."""
 19 |         super().__init__(*args, **kwargs)
 20 |         self.data_type = data_type
 21 |         self.processor = processor or (lambda x: x)
 22 |         self.allow_fallback = allow_fallback
 23 | 
 24 |     def handle(self, value, context: typing.MutableMapping):
 25 |         """Handle value."""
 26 |         if isinstance(value, self.data_type):
 27 |             return self.processor(value)
 28 | 
 29 |         try:
 30 |             return self.processor(self.data_type(value))
 31 |         except ValueError:
 32 |             if not self.allow_fallback:
 33 |                 self.report(value, context)
 34 | 
 35 | 
 36 | class Duration(Property[timedelta]):
 37 |     """Duration property."""
 38 | 
 39 |     duration_re = re.compile(r'(?P<hours>\d{1,2}):'
 40 |                              r'(?P<minutes>\d{1,2}):'
 41 |                              r'(?P<seconds>\d{1,2})(?:\.'
 42 |                              r'(?P<milliseconds>\d{3})'
 43 |                              r'(?P<microseconds>\d{3})?\d*)?')
 44 | 
 45 |     def __init__(self, *args: str, resolution: typing.Union[int, Decimal] = 1, **kwargs):
 46 |         """Initialize a Duration."""
 47 |         super().__init__(*args, **kwargs)
 48 |         self.resolution = resolution
 49 | 
 50 |     def handle(self, value, context: typing.MutableMapping):
 51 |         """Return duration as timedelta."""
 52 |         if isinstance(value, timedelta):
 53 |             return value
 54 |         elif isinstance(value, int):
 55 |             return timedelta(milliseconds=int(value * self.resolution))
 56 |         try:
 57 |             return timedelta(
 58 |                 milliseconds=int(Decimal(value) * self.resolution))
 59 |         except (ValueError, InvalidOperation):
 60 |             pass
 61 | 
 62 |         match = self.duration_re.match(value)
 63 |         if not match:
 64 |             self.report(value, context)
 65 |             return None
 66 | 
 67 |         params = {
 68 |             key: int(value)
 69 |             for key, value in match.groupdict().items()
 70 |             if value
 71 |         }
 72 |         return timedelta(**params)
 73 | 
 74 | 
 75 | class Language(Property[babelfish.Language]):
 76 |     """Language property."""
 77 | 
 78 |     def handle(self, value, context: typing.MutableMapping):
 79 |         """Handle languages."""
 80 |         try:
 81 |             if len(value) == 3:
 82 |                 try:
 83 |                     return babelfish.Language.fromalpha3b(value)
 84 |                 except babelfish.Error:
 85 |                     # Try alpha3t if alpha3b fails
 86 |                     return babelfish.Language.fromalpha3t(value)
 87 | 
 88 |             return babelfish.Language.fromietf(value)
 89 |         except (babelfish.Error, ValueError):
 90 |             pass
 91 | 
 92 |         try:
 93 |             return babelfish.Language.fromname(value)
 94 |         except babelfish.Error:
 95 |             pass
 96 | 
 97 |         self.report(value, context)
 98 |         return babelfish.Language('und')
 99 | 
100 | 
101 | class Quantity(Property):
102 |     """Quantity is a property with unit."""
103 | 
104 |     def __init__(self, *args: str, unit, data_type=int, **kwargs):
105 |         """Init method."""
106 |         super().__init__(*args, **kwargs)
107 |         self.unit = unit
108 |         self.data_type = data_type
109 | 
110 |     def handle(self, value, context):
111 |         """Handle value with unit."""
112 |         if not isinstance(value, self.data_type):
113 |             try:
114 |                 value = self.data_type(value)
115 |             except ValueError:
116 |                 self.report(value, context)
117 |                 return
118 |         if isinstance(value, Decimal):
119 |             value = round_decimal(value, min_digits=1, max_digits=3)
120 | 
121 |         return value if context.get('no_units') else value * self.unit
122 | 
123 | 
124 | class YesNo(Configurable[str]):
125 |     """Yes or No handler."""
126 | 
127 |     yes_values = ('yes', 'true', '1')
128 | 
129 |     def __init__(self, *args: str, yes=True, no=False, hide_value=None,
130 |                  config: typing.Optional[
131 |                      typing.Mapping[str, typing.Mapping]] = None,
132 |                  config_key: typing.Optional[str] = None,
133 |                  **kwargs):
134 |         """Init method."""
135 |         super().__init__(config or {}, config_key=config_key, *args, **kwargs)
136 |         self.yes = yes
137 |         self.no = no
138 |         self.hide_value = hide_value
139 | 
140 |     def handle(self, value, context):
141 |         """Handle boolean values."""
142 |         result = self.yes if str(value).lower() in self.yes_values else self.no
143 |         if result == self.hide_value:
144 |             return None
145 | 
146 |         return super().handle(result, context) if self.mapping else result
147 | 


--------------------------------------------------------------------------------
/tests/data/mkvmerge/media_001.mkv.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "attachments": [],
  3 |   "chapters": [
  4 |     {
  5 |       "num_entries": 8
  6 |     }
  7 |   ],
  8 |   "container": {
  9 |     "properties": {
 10 |       "container_type": 17,
 11 |       "date_local": "2020-11-11T07:35:31+01:00",
 12 |       "date_utc": "2020-11-11T06:35:31Z",
 13 |       "duration": 3428352000000,
 14 |       "is_providing_timestamps": true,
 15 |       "muxing_application": "libebml v1.4.0 + libmatroska v1.6.2",
 16 |       "segment_uid": "5c5db0c93c7ebac3c88f7d372288b20e",
 17 |       "title": "Media 001",
 18 |       "writing_application": "mkvmerge v51.0.0 ('I Wish') 64-bit"
 19 |     },
 20 |     "recognized": true,
 21 |     "supported": true,
 22 |     "type": "Matroska"
 23 |   },
 24 |   "errors": [],
 25 |   "file_name": "tests/data/mkvmerge/media_001.mkv",
 26 |   "global_tags": [],
 27 |   "identification_format_version": 14,
 28 |   "track_tags": [],
 29 |   "tracks": [
 30 |     {
 31 |       "codec": "HEVC/H.265/MPEG-H",
 32 |       "id": 0,
 33 |       "properties": {
 34 |         "codec_id": "V_MPEGH/ISO/HEVC",
 35 |         "codec_private_data": "012220000000b0000000000099f000fcfdfafa00000f04600001002240010c01ffff222000000300b0000003000003009914860300000303e900005dc050610001003d420101222000000300b00000030000030099a001e020021c4db1486924294af016a1220136c2000007d20000bb80c5781cdc0004eef80009ddf7cf1e3d62000100074401c172f63b64670002001e4e01891821349baa199608fc8a4839083d13404200989680000003001480000a4e019004000003000080",
 36 |         "codec_private_length": 187,
 37 |         "default_duration": 41708333,
 38 |         "default_track": false,
 39 |         "display_dimensions": "3840x2160",
 40 |         "display_unit": 0,
 41 |         "enabled_track": true,
 42 |         "forced_track": false,
 43 |         "language": "eng",
 44 |         "language_ietf": "en",
 45 |         "minimum_timestamp": 0,
 46 |         "number": 1,
 47 |         "packetizer": "mpegh_p2_video",
 48 |         "pixel_dimensions": "3840x2160",
 49 |         "uid": 1
 50 |       },
 51 |       "type": "video"
 52 |     },
 53 |     {
 54 |       "codec": "TrueHD Atmos",
 55 |       "id": 1,
 56 |       "properties": {
 57 |         "audio_channels": 8,
 58 |         "audio_sampling_frequency": 48000,
 59 |         "codec_id": "A_TRUEHD",
 60 |         "codec_private_length": 0,
 61 |         "default_track": true,
 62 |         "enabled_track": true,
 63 |         "forced_track": false,
 64 |         "language": "eng",
 65 |         "language_ietf": "en",
 66 |         "minimum_timestamp": 0,
 67 |         "number": 2,
 68 |         "uid": 2
 69 |       },
 70 |       "type": "audio"
 71 |     },
 72 |     {
 73 |       "codec": "AC-3 Dolby Surround EX",
 74 |       "id": 2,
 75 |       "properties": {
 76 |         "audio_channels": 6,
 77 |         "audio_sampling_frequency": 48000,
 78 |         "codec_id": "A_AC3",
 79 |         "codec_private_length": 0,
 80 |         "default_duration": 32000000,
 81 |         "default_track": false,
 82 |         "enabled_track": true,
 83 |         "forced_track": false,
 84 |         "language": "eng",
 85 |         "language_ietf": "en",
 86 |         "minimum_timestamp": 0,
 87 |         "number": 3,
 88 |         "uid": 3
 89 |       },
 90 |       "type": "audio"
 91 |     },
 92 |     {
 93 |       "codec": "AC-3",
 94 |       "id": 3,
 95 |       "properties": {
 96 |         "audio_channels": 6,
 97 |         "audio_sampling_frequency": 48000,
 98 |         "codec_id": "A_AC3",
 99 |         "codec_private_length": 0,
100 |         "default_duration": 32000000,
101 |         "default_track": false,
102 |         "enabled_track": true,
103 |         "forced_track": false,
104 |         "language": "ger",
105 |         "language_ietf": "de",
106 |         "minimum_timestamp": 0,
107 |         "number": 4,
108 |         "uid": 4
109 |       },
110 |       "type": "audio"
111 |     },
112 |     {
113 |       "codec": "DTS",
114 |       "id": 4,
115 |       "properties": {
116 |         "audio_bits_per_sample": 24,
117 |         "audio_channels": 2,
118 |         "audio_sampling_frequency": 48000,
119 |         "codec_id": "A_DTS",
120 |         "codec_private_length": 0,
121 |         "default_duration": 10666667,
122 |         "default_track": false,
123 |         "enabled_track": true,
124 |         "forced_track": false,
125 |         "language": "por",
126 |         "language_ietf": "pt-BR",
127 |         "minimum_timestamp": 2002000000,
128 |         "number": 5,
129 |         "uid": 5
130 |       },
131 |       "type": "audio"
132 |     },
133 |     {
134 |       "codec": "HDMV PGS",
135 |       "id": 5,
136 |       "properties": {
137 |         "codec_id": "S_HDMV/PGS",
138 |         "codec_private_length": 0,
139 |         "content_encoding_algorithms": "0",
140 |         "default_track": false,
141 |         "enabled_track": true,
142 |         "forced_track": false,
143 |         "language": "eng",
144 |         "language_ietf": "en",
145 |         "number": 6,
146 |         "uid": 6
147 |       },
148 |       "type": "subtitles"
149 |     },
150 |     {
151 |       "codec": "HDMV PGS",
152 |       "id": 6,
153 |       "properties": {
154 |         "codec_id": "S_HDMV/PGS",
155 |         "codec_private_length": 0,
156 |         "content_encoding_algorithms": "0",
157 |         "default_track": false,
158 |         "enabled_track": true,
159 |         "forced_track": false,
160 |         "language": "ger",
161 |         "language_ietf": "de",
162 |         "number": 7,
163 |         "uid": 11
164 |       },
165 |       "type": "subtitles"
166 |     },
167 |     {
168 |       "codec": "HDMV PGS",
169 |       "id": 7,
170 |       "properties": {
171 |         "codec_id": "S_HDMV/PGS",
172 |         "codec_private_length": 0,
173 |         "content_encoding_algorithms": "0",
174 |         "default_track": false,
175 |         "enabled_track": true,
176 |         "forced_track": false,
177 |         "language": "por",
178 |         "language_ietf": "pt-BR",
179 |         "number": 8,
180 |         "uid": 14
181 |       },
182 |       "type": "subtitles"
183 |     }
184 |   ],
185 |   "warnings": []
186 | }
187 | 


--------------------------------------------------------------------------------
/tests/test_resolution.yml:
--------------------------------------------------------------------------------
  1 | # https://en.wikipedia.org/wiki/Pixel_aspect_ratio
  2 | # https://en.wikipedia.org/wiki/Low-definition_television
  3 | # https://knowledge.kaltura.com/best-practices-multi-device-transcoding
  4 | 240p:
  5 |   - width: 320 pixel
  6 |     height: 250 pixel
  7 |     scan_type: Progressive
  8 |     aspect_ratio: 1.28
  9 |     pixel_aspect_ratio: 1.0
 10 |   - width: 320 pixel
 11 |     height: 250 pixel
 12 |     scan_type: Progressive
 13 |   - width: 320 pixel
 14 |     height: 262 pixel
 15 |     scan_type: Progressive
 16 |     aspect_ratio: 1.221
 17 |     pixel_aspect_ratio: 1.0
 18 |   - width: 320 pixel
 19 |     height: 262 pixel
 20 |     scan_type: Progressive
 21 | 
 22 | 288p:
 23 |   - width: 480 pixel
 24 |     height: 270 pixel
 25 |     scan_type: Progressive
 26 |     aspect_ratio: 1.778
 27 |     pixel_aspect_ratio: 1.0
 28 |   - width: 480 pixel
 29 |     height: 270 pixel
 30 |     scan_type: Progressive
 31 | 
 32 | 360p:
 33 |   - width: 640 pixel
 34 |     height: 360 pixel
 35 |     scan_type: Progressive
 36 |     aspect_ratio: 1.778
 37 |     pixel_aspect_ratio: 1.0
 38 |   - width: 640 pixel
 39 |     height: 360 pixel
 40 |     scan_type: Progressive
 41 |   - width: 640 pixel
 42 |     height: 352 pixel
 43 |     scan_type: Progressive
 44 |     aspect_ratio: 1.818
 45 |     pixel_aspect_ratio: 1.0
 46 |   - width: 640 pixel
 47 |     height: 352 pixel
 48 |     scan_type: Progressive
 49 |   - width: 592 pixel
 50 |     height: 320 pixel
 51 |     scan_type: Progressive
 52 |     aspect_ratio: 1.85
 53 |     pixel_aspect_ratio: 1.0
 54 |   - width: 592 pixel
 55 |     height: 320 pixel
 56 |     scan_type: Progressive
 57 |   - width: 640 pixel
 58 |     height: 320 pixel
 59 |     scan_type: Progressive
 60 |   - width: 640 pixel
 61 |     height: 352 pixel
 62 |     scan_type: Progressive
 63 | 
 64 | 480p:
 65 |   - width: 640 pixel
 66 |     height: 320 pixel
 67 |     scan_type: Progressive
 68 |     aspect_ratio: 2.0
 69 |     pixel_aspect_ratio: 1.092
 70 |   - width: 752 pixel
 71 |     height: 398 pixel
 72 |     scan_type: Progressive
 73 |     aspect_ratio: 1.889
 74 |     pixel_aspect_ratio: 1.0
 75 |   - width: 752 pixel
 76 |     height: 398 pixel
 77 |     scan_type: Progressive
 78 |   - width: 720 pixel
 79 |     height: 400 pixel
 80 |     scan_type: Progressive
 81 |     aspect_ratio: 1.8
 82 |     pixel_aspect_ratio: 1.0
 83 |   - width: 720 pixel
 84 |     height: 400 pixel
 85 |     scan_type: Progressive
 86 | 
 87 | 576p:
 88 |   - width: 720 pixel
 89 |     height: 576 pixel
 90 |     scan_type: Progressive
 91 |     aspect_ratio: 1.333
 92 |     pixel_aspect_ratio: 1.067
 93 |   - width: 720 pixel
 94 |     height: 576 pixel
 95 |     scan_type: Progressive
 96 |   - width: 720 pixel
 97 |     height: 576 pixel
 98 |     scan_type: Progressive
 99 |     aspect_ratio: 1.778
100 |     pixel_aspect_ratio: 1.422  # not sure
101 |   - width: 720 pixel
102 |     height: 576 pixel
103 |     scan_type: Progressive
104 |   - width: 640 pixel
105 |     height: 352 pixel
106 |     scan_type: Progressive
107 |     aspect_ratio: 1.818
108 |     pixel_aspect_ratio: 1.422
109 |   - width: 720 pixel
110 |     height: 596 pixel
111 |     scan_type: Progressive
112 |     aspect_ratio: 1.289
113 |     pixel_aspect_ratio: 1.067
114 |   - width: 720 pixel
115 |     height: 586 pixel
116 |     scan_type: Progressive
117 |     aspect_ratio: 1.311
118 |     pixel_aspect_ratio: 1.067
119 |   - width: 720 pixel
120 |     height: 588 pixel
121 |     scan_type: Progressive
122 |     aspect_ratio: 1.304
123 |     pixel_aspect_ratio: 1.065
124 |   - width: 720 pixel
125 |     height: 590 pixel
126 |     scan_type: Progressive
127 |     aspect_ratio: 1.302
128 |     pixel_aspect_ratio: 1.067
129 | 
130 | 720p:
131 |   - width: 1280 pixel
132 |     height: 720 pixel
133 |     scan_type: Progressive
134 |     aspect_ratio: 1.778
135 |     pixel_aspect_ratio: 1.0
136 |   - width: 1280 pixel
137 |     height: 720 pixel
138 |     scan_type: Progressive
139 |   - width: 1280 pixel
140 |     height: 544 pixel
141 |     scan_type: Progressive
142 |     aspect_ratio: 2.353
143 |     pixel_aspect_ratio: 1.0
144 |   - width: 1280 pixel
145 |     height: 544 pixel
146 |     scan_type: Progressive
147 |   - width: 1280 pixel
148 |     height: 542 pixel
149 |     scan_type: Progressive
150 |     aspect_ratio: 2.362
151 |     pixel_aspect_ratio: 1.0
152 |   - width: 1280 pixel
153 |     height: 542 pixel
154 |     scan_type: Progressive
155 |   - width: 1280 pixel
156 |     height: 526 pixel
157 |     scan_type: Progressive
158 |     aspect_ratio: 2.433
159 |     pixel_aspect_ratio: 1.0
160 |   - width: 1280 pixel
161 |     height: 526 pixel
162 |     scan_type: Progressive
163 |   - width: 1264 pixel
164 |     height: 718 pixel
165 |     scan_type: Progressive
166 |     aspect_ratio: 1.76
167 |     pixel_aspect_ratio: 1.0
168 |   - width: 1264 pixel
169 |     height: 718 pixel
170 |     scan_type: Progressive
171 | 
172 | 1080p:
173 |   - width: 1920 pixel
174 |     height: 1080 pixel
175 |     scan_type: Progressive
176 |     aspect_ratio: 1.778
177 |     pixel_aspect_ratio: 1.0
178 |   - width: 1920 pixel
179 |     height: 1080 pixel
180 |     scan_type: Progressive
181 |   - width: 1426 pixel
182 |     height: 1080 pixel
183 |     scan_type: Progressive
184 |     aspect_ratio: 1.319
185 |   - width: 1426 pixel
186 |     height: 1080 pixel
187 |     scan_type: Progressive
188 |   - width: 1920 pixel
189 |     height: 800 pixel
190 |     scan_type: Progressive
191 |     aspect_ratio: 2.4
192 |     pixel_aspect_ratio: 1.0
193 |   - width: 1920 pixel
194 |     height: 800 pixel
195 |     scan_type: Progressive
196 |   - width: 1920 pixel
197 |     height: 796 pixel
198 |     scan_type: Progressive
199 |     aspect_ratio: 2.412
200 |     pixel_aspect_ratio: 1.0
201 |   - width: 1920 pixel
202 |     height: 796 pixel
203 |     scan_type: Progressive
204 |   - width: 1920 pixel
205 |     height: 540 pixel
206 |     scan_type: Progressive
207 |     aspect_ratio: 3.556
208 |     pixel_aspect_ratio: 1.0
209 |   - width: 1920 pixel
210 |     height: 540 pixel
211 |     scan_type: Progressive
212 | 


--------------------------------------------------------------------------------
/tests/data/ffmpeg/issue-39-example-02.mkv.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "streams": [
  3 |         {
  4 |             "index": 0,
  5 |             "codec_name": "hevc",
  6 |             "codec_long_name": "H.265 / HEVC (High Efficiency Video Coding)",
  7 |             "profile": "Main",
  8 |             "codec_type": "video",
  9 |             "codec_time_base": "1001/24000",
 10 |             "codec_tag_string": "[0][0][0][0]",
 11 |             "codec_tag": "0x0000",
 12 |             "width": 1920,
 13 |             "height": 1080,
 14 |             "coded_width": 1920,
 15 |             "coded_height": 1080,
 16 |             "closed_captions": 0,
 17 |             "has_b_frames": 2,
 18 |             "sample_aspect_ratio": "1:1",
 19 |             "display_aspect_ratio": "16:9",
 20 |             "pix_fmt": "yuv420p",
 21 |             "level": 120,
 22 |             "color_range": "tv",
 23 |             "color_space": "bt709",
 24 |             "color_transfer": "bt709",
 25 |             "color_primaries": "bt709",
 26 |             "refs": 1,
 27 |             "r_frame_rate": "24000/1001",
 28 |             "avg_frame_rate": "24000/1001",
 29 |             "time_base": "1/1000",
 30 |             "start_pts": 105,
 31 |             "start_time": "0:00:00.105000",
 32 |             "disposition": {
 33 |                 "default": 1,
 34 |                 "dub": 0,
 35 |                 "original": 0,
 36 |                 "comment": 0,
 37 |                 "lyrics": 0,
 38 |                 "karaoke": 0,
 39 |                 "forced": 0,
 40 |                 "hearing_impaired": 0,
 41 |                 "visual_impaired": 0,
 42 |                 "clean_effects": 0,
 43 |                 "attached_pic": 0,
 44 |                 "timed_thumbnails": 0
 45 |             },
 46 |             "tags": {
 47 |                 "BPS": "1500697",
 48 |                 "DURATION": "01:13:14.056708333",
 49 |                 "NUMBER_OF_FRAMES": "105352",
 50 |                 "NUMBER_OF_BYTES": "824268753",
 51 |                 "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit",
 52 |                 "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05",
 53 |                 "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
 54 |             }
 55 |         },
 56 |         {
 57 |             "index": 1,
 58 |             "codec_name": "aac",
 59 |             "codec_long_name": "AAC (Advanced Audio Coding)",
 60 |             "profile": "HE-AAC",
 61 |             "codec_type": "audio",
 62 |             "codec_time_base": "1/48000",
 63 |             "codec_tag_string": "[0][0][0][0]",
 64 |             "codec_tag": "0x0000",
 65 |             "sample_fmt": "fltp",
 66 |             "sample_rate": "48000",
 67 |             "channels": 6,
 68 |             "channel_layout": "5.1",
 69 |             "bits_per_sample": 0,
 70 |             "r_frame_rate": "0/0",
 71 |             "avg_frame_rate": "0/0",
 72 |             "time_base": "1/1000",
 73 |             "start_pts": 0,
 74 |             "start_time": "0:00:00.000000",
 75 |             "disposition": {
 76 |                 "default": 1,
 77 |                 "dub": 0,
 78 |                 "original": 0,
 79 |                 "comment": 0,
 80 |                 "lyrics": 0,
 81 |                 "karaoke": 0,
 82 |                 "forced": 0,
 83 |                 "hearing_impaired": 0,
 84 |                 "visual_impaired": 0,
 85 |                 "clean_effects": 0,
 86 |                 "attached_pic": 0,
 87 |                 "timed_thumbnails": 0
 88 |             },
 89 |             "tags": {
 90 |                 "title": "Surround",
 91 |                 "LANGUAGE": "eng",
 92 |                 "BPS": "256002",
 93 |                 "DURATION": "01:13:12.106000000",
 94 |                 "NUMBER_OF_FRAMES": "102941",
 95 |                 "NUMBER_OF_BYTES": "140548779",
 96 |                 "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit",
 97 |                 "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05",
 98 |                 "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
 99 |             }
100 |         },
101 |         {
102 |             "index": 2,
103 |             "codec_name": "hdmv_pgs_subtitle",
104 |             "codec_long_name": "HDMV Presentation Graphic Stream subtitles",
105 |             "codec_type": "subtitle",
106 |             "codec_time_base": "0/1",
107 |             "codec_tag_string": "[0][0][0][0]",
108 |             "codec_tag": "0x0000",
109 |             "r_frame_rate": "0/0",
110 |             "avg_frame_rate": "0/0",
111 |             "time_base": "1/1000",
112 |             "start_pts": 0,
113 |             "start_time": "0:00:00.000000",
114 |             "duration_ts": 4394162,
115 |             "duration": "1:13:14.162000",
116 |             "disposition": {
117 |                 "default": 0,
118 |                 "dub": 0,
119 |                 "original": 0,
120 |                 "comment": 0,
121 |                 "lyrics": 0,
122 |                 "karaoke": 0,
123 |                 "forced": 0,
124 |                 "hearing_impaired": 0,
125 |                 "visual_impaired": 0,
126 |                 "clean_effects": 0,
127 |                 "attached_pic": 0,
128 |                 "timed_thumbnails": 0
129 |             },
130 |             "tags": {
131 |                 "LANGUAGE": "eng",
132 |                 "BPS": "30412",
133 |                 "DURATION": "01:13:02.712000000",
134 |                 "NUMBER_OF_FRAMES": "1518",
135 |                 "NUMBER_OF_BYTES": "16660969",
136 |                 "_STATISTICS_WRITING_APP": "mkvpropedit v56.1.0 ('My Friend') 64-bit",
137 |                 "_STATISTICS_WRITING_DATE_UTC": "2021-05-27 15:27:05",
138 |                 "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
139 |             }
140 |         }
141 |     ],
142 |     "format": {
143 |         "filename": "Z:\\Videos\\Shows (Prospective)\\Band of Brothers (2001)\\01x01 - Currahee.mkv",
144 |         "nb_streams": 3,
145 |         "nb_programs": 0,
146 |         "format_name": "matroska,webm",
147 |         "format_long_name": "Matroska / WebM",
148 |         "start_time": "0:00:00.000000",
149 |         "duration": "1:13:14.162000",
150 |         "size": "983004345",
151 |         "bit_rate": "1789655",
152 |         "probe_score": 100,
153 |         "tags": {
154 |             "ENCODER": "Lavf55.12.0"
155 |         }
156 |     }
157 | }


--------------------------------------------------------------------------------
/knowit/provider.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import typing
  4 | from logging import NullHandler, getLogger
  5 | 
  6 | import knowit.config
  7 | from knowit.core import Property, Rule
  8 | from knowit.properties import Quantity
  9 | from knowit.units import units
 10 | 
 11 | logger = getLogger(__name__)
 12 | logger.addHandler(NullHandler())
 13 | 
 14 | 
 15 | size_property = Quantity('size', unit=units.byte, description='media size')
 16 | 
 17 | PropertyMap = typing.Mapping[str, Property]
 18 | PropertyConfig = typing.Mapping[str, PropertyMap]
 19 | 
 20 | RuleMap = typing.Mapping[str, Rule]
 21 | RuleConfig = typing.Mapping[str, RuleMap]
 22 | 
 23 | 
 24 | class Provider:
 25 |     """Base class for all providers."""
 26 | 
 27 |     executor: typing.Union["Executor", None]
 28 |     min_fps = 10
 29 |     max_fps = 200
 30 | 
 31 |     def __init__(
 32 |             self,
 33 |             config: knowit.config.Config,
 34 |             mapping: PropertyConfig,
 35 |             rules: typing.Optional[RuleConfig] = None,
 36 |     ):
 37 |         """Init method."""
 38 |         self.config = config
 39 |         self.mapping = mapping
 40 |         self.rules = rules or {}
 41 |         self.executor = None
 42 | 
 43 |     def loaded(self) -> bool:
 44 |         """Whether or not this provider was loaded."""
 45 |         raise NotImplementedError
 46 | 
 47 |     def accepts(self, target):
 48 |         """Whether or not the video is supported by this provider."""
 49 |         raise NotImplementedError
 50 | 
 51 |     def describe(self, target, context):
 52 |         """Read video metadata information."""
 53 |         raise NotImplementedError
 54 | 
 55 |     def _describe_tracks(self, video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context):
 56 |         logger.debug('Handling general track')
 57 |         props = self._describe_track(general_track, 'general', context)
 58 | 
 59 |         if 'path' not in props:
 60 |             props['path'] = video_path
 61 |         if 'container' not in props:
 62 |             props['container'] = os.path.splitext(video_path)[1][1:]
 63 |         if 'size' not in props and os.path.isfile(video_path):
 64 |             props['size'] = size_property.handle(os.path.getsize(video_path), context)
 65 | 
 66 |         for track_type, tracks, in (('video', video_tracks),
 67 |                                     ('audio', audio_tracks),
 68 |                                     ('subtitle', subtitle_tracks)):
 69 |             results = []
 70 |             for track in tracks or []:
 71 |                 logger.debug('Handling %s track', track_type)
 72 |                 t = self._validate_track(track_type, self._describe_track(track, track_type, context))
 73 |                 if t:
 74 |                     results.append(t)
 75 | 
 76 |             if results:
 77 |                 props[track_type] = results
 78 | 
 79 |         return props
 80 | 
 81 |     @classmethod
 82 |     def _validate_track(cls, track_type, track):
 83 |         if track_type != 'video' or 'frame_rate' not in track:
 84 |             return track
 85 | 
 86 |         frame_rate = track['frame_rate']
 87 |         try:
 88 |             frame_rate = frame_rate.magnitude
 89 |         except AttributeError:
 90 |             pass
 91 | 
 92 |         if cls.min_fps < frame_rate < cls.max_fps:
 93 |             return track
 94 | 
 95 |     def _describe_track(self, track, track_type, context):
 96 |         """Describe track to a dict.
 97 | 
 98 |         :param track:
 99 |         :param track_type:
100 |         :rtype: dict
101 |         """
102 |         props = {}
103 |         pv_props = {}
104 |         for name, prop in self.mapping[track_type].items():
105 |             if not prop:
106 |                 # placeholder to be populated by rules. It keeps the order
107 |                 props[name] = None
108 |                 continue
109 | 
110 |             value = prop.extract_value(track, context)
111 |             if value is not None:
112 |                 which = props if not prop.private else pv_props
113 |                 which[name] = value
114 | 
115 |         for name, rule in self.rules.get(track_type, {}).items():
116 |             if props.get(name) is not None and not rule.override:
117 |                 logger.debug('Skipping rule %s since property is already present: %r', name, props[name])
118 |                 continue
119 | 
120 |             value = rule.execute(props, pv_props, context)
121 |             if value is not None:
122 |                 which = props if not rule.private else pv_props
123 |                 which[name] = value
124 |             elif name in props and (not rule.override or props[name] is None):
125 |                 del props[name]
126 | 
127 |         return props
128 | 
129 |     def match_executor_location(self, suggested_path: typing.Union[str, None]) -> bool:
130 |         """Compare the suggested path to the path that was suggested when creating the provider."""
131 |         if self.executor is None:
132 |             return True
133 |         if self.executor.location == suggested_path:
134 |             return True
135 |         return False
136 | 
137 |     @property
138 |     def version(self):
139 |         """Return provider version information."""
140 |         raise NotImplementedError
141 | 
142 | 
143 | class Executor:
144 |     """Abstraction to a library or executable to be used by a provider."""
145 | 
146 |     def __init__(self, location, version):
147 |         """Initialize the object."""
148 |         self.location = location
149 |         self.version = version
150 | 
151 |     def extract_info(self, filename):
152 |         """Extract media info."""
153 |         raise NotImplementedError
154 | 
155 |     @classmethod
156 |     def create(cls, os_family=None, suggested_path=None):
157 |         """Create the executor instance."""
158 |         raise NotImplementedError
159 | 
160 |     @classmethod
161 |     def get_executor_instance(cls, suggested_path=None) -> "Executor":
162 |         """Return executor instance."""
163 |         raise NotImplementedError
164 | 
165 | 
166 | class NotFoundExecutor(Executor):
167 |     """Executor with a library or executable that was not found."""
168 | 
169 |     def __init__(self, location, version=None) -> None:
170 |         """Initialize the object."""
171 |         self.location = location
172 |         self.warned = False
173 | 
174 |     def __bool__(self) -> bool:
175 |         """Executor not found is always False."""
176 |         return False
177 | 
178 |     def extract_info(self, filename):
179 |         """Extract media info."""
180 |         return {}
181 | 
182 | 
183 | class ProviderError(Exception):
184 |     """Base class for provider exceptions."""
185 | 
186 |     pass
187 | 
188 | 
189 | class MalformedFileError(ProviderError):
190 |     """Malformed File error."""
191 | 
192 |     pass
193 | 
194 | 
195 | class UnsupportedFileFormatError(ProviderError):
196 |     """Unsupported File Format error."""
197 | 
198 |     pass
199 | 


--------------------------------------------------------------------------------
/knowit/__main__.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import sys
  6 | import typing
  7 | from argparse import ArgumentParser
  8 | 
  9 | import yaml
 10 | 
 11 | from knowit import (
 12 |     __url__,
 13 |     __version__,
 14 |     api,
 15 | )
 16 | from knowit.provider import ProviderError
 17 | from knowit.serializer import (
 18 |     get_json_encoder,
 19 |     get_yaml_dumper,
 20 | )
 21 | from knowit.utils import recurse_paths
 22 | 
 23 | logging.basicConfig(stream=sys.stdout, format='%(message)s')
 24 | logging.getLogger('CONSOLE').setLevel(logging.INFO)
 25 | logging.getLogger('knowit').setLevel(logging.ERROR)
 26 | 
 27 | console = logging.getLogger('CONSOLE')
 28 | logger = logging.getLogger('knowit')
 29 | 
 30 | 
 31 | def build_argument_parser() -> ArgumentParser:
 32 |     """Build the argument parser."""
 33 |     opts = ArgumentParser()
 34 |     opts.add_argument(
 35 |         dest='videopath',
 36 |         help='Path to the video to introspect',
 37 |         nargs='*',
 38 |         type=str,
 39 |     )
 40 | 
 41 |     provider_opts = opts.add_argument_group('Providers')
 42 |     provider_opts.add_argument(
 43 |         '-p',
 44 |         '--provider',
 45 |         dest='provider',
 46 |         help='The provider to be used: mediainfo, ffmpeg, mkvmerge or enzyme.',
 47 |         type=str,
 48 |     )
 49 | 
 50 |     output_opts = opts.add_argument_group('Output')
 51 |     output_opts.add_argument(
 52 |         '--debug',
 53 |         action='store_true',
 54 |         dest='debug',
 55 |         help='Print information for debugging knowit and for reporting bugs.'
 56 |     )
 57 |     output_opts.add_argument(
 58 |         '--report',
 59 |         action='store_true',
 60 |         dest='report',
 61 |         help='Parse media and report all non-detected values'
 62 |     )
 63 |     output_opts.add_argument(
 64 |         '-y',
 65 |         '--yaml',
 66 |         action='store_true',
 67 |         dest='yaml',
 68 |         help='Display output in yaml format'
 69 |     )
 70 |     output_opts.add_argument(
 71 |         '-N',
 72 |         '--no-units',
 73 |         action='store_true',
 74 |         dest='no_units',
 75 |         help='Display output without units'
 76 |     )
 77 |     output_opts.add_argument(
 78 |         '-P',
 79 |         '--profile',
 80 |         dest='profile',
 81 |         help='Display values according to specified profile: code, default, human, technical',
 82 |         type=str,
 83 |     )
 84 | 
 85 |     conf_opts = opts.add_argument_group('Configuration')
 86 |     conf_opts.add_argument(
 87 |         '--mediainfo',
 88 |         dest='mediainfo',
 89 |         help='The location to search for MediaInfo binaries',
 90 |         type=str,
 91 |     )
 92 |     conf_opts.add_argument(
 93 |         '--ffmpeg',
 94 |         dest='ffmpeg',
 95 |         help='The location to search for ffprobe (FFmpeg) binaries',
 96 |         type=str,
 97 |     )
 98 |     conf_opts.add_argument(
 99 |         '--mkvmerge',
100 |         dest='mkvmerge',
101 |         help='The location to search for mkvmerge (MKVToolNix) binaries',
102 |         type=str,
103 |     )
104 | 
105 |     information_opts = opts.add_argument_group('Information')
106 |     information_opts.add_argument(
107 |         '--version',
108 |         dest='version',
109 |         action='store_true',
110 |         help='Display knowit version.'
111 |     )
112 | 
113 |     return opts
114 | 
115 | 
116 | def knowit(
117 |         video_path: typing.Union[str, os.PathLike],
118 |         options: argparse.Namespace,
119 |         context: typing.MutableMapping,
120 | ) -> typing.Mapping:
121 |     """Extract video metadata."""
122 |     context['path'] = video_path
123 |     if not options.report:
124 |         console.info('For: %s', video_path)
125 |     else:
126 |         console.info('Parsing: %s', video_path)
127 |     info = api.know(video_path, context)
128 |     if not options.report:
129 |         console.info('Knowit %s found: ', __version__)
130 |         console.info(dumps(info, options, context))
131 |     return info
132 | 
133 | 
134 | def _as_yaml(
135 |         info: typing.Mapping[str, typing.Any],
136 |         context: typing.Mapping,
137 | ) -> str:
138 |     """Convert info to string using YAML format."""
139 |     data = {info['path']: info} if 'path' in info else info
140 |     return yaml.dump(
141 |         data,
142 |         Dumper=get_yaml_dumper(context),
143 |         default_flow_style=False,
144 |         allow_unicode=True,
145 |         sort_keys=False,
146 |     )
147 | 
148 | 
149 | def _as_json(
150 |         info: typing.Mapping[str, typing.Any],
151 |         context: typing.Mapping,
152 | ) -> str:
153 |     """Convert info to string using JSON format."""
154 |     return json.dumps(
155 |         info,
156 |         cls=get_json_encoder(context),
157 |         indent=4,
158 |         ensure_ascii=False,
159 |     )
160 | 
161 | 
162 | def dumps(
163 |         info: typing.Mapping[str, typing.Any],
164 |         options: argparse.Namespace,
165 |         context: typing.Mapping,
166 | ) -> str:
167 |     """Convert info to string using json or yaml format."""
168 |     convert = _as_yaml if options.yaml else _as_json
169 |     return convert(info, context)
170 | 
171 | 
172 | def main(args: typing.Optional[typing.List[str]] = None) -> None:
173 |     """Execute main function for entry point."""
174 |     argument_parser = build_argument_parser()
175 |     args = args or sys.argv[1:]
176 |     options = argument_parser.parse_args(args)
177 | 
178 |     if options.debug:
179 |         logger.setLevel(logging.DEBUG)
180 |         logging.getLogger('enzyme').setLevel(logging.INFO)
181 |     else:
182 |         logger.setLevel(logging.WARNING)
183 | 
184 |     paths = recurse_paths(options.videopath)
185 | 
186 |     if not paths:
187 |         if options.version:
188 |             console.info(api.debug_info())
189 |         else:
190 |             argument_parser.print_help()
191 |         return
192 | 
193 |     report: typing.MutableMapping[str, str] = {}
194 |     for i, video_path in enumerate(paths):
195 |         try:
196 |             context = {k: v for k, v in vars(options).items() if v is not None}
197 |             if options.report:
198 |                 context['report'] = report
199 |             else:
200 |                 del context['report']
201 |             knowit(video_path, options, context)
202 |         except ProviderError:
203 |             logger.exception('Error when processing video')
204 |         except OSError:
205 |             logger.exception('OS error when processing video')
206 |         except UnicodeError:
207 |             logger.exception('Character encoding error when processing video')
208 |         except api.KnowitException as e:
209 |             logger.error(e)
210 | 
211 |         if options.report and i % 20 == 19 and report:
212 |             console.info('Unknown values so far:')
213 |             console.info(dumps(report, options, vars(options)))
214 | 
215 |     if options.report:
216 |         if report:
217 |             console.info('Knowit %s found unknown values:', __version__)
218 |             console.info(dumps(report, options, vars(options)))
219 |             console.info('Please report them at %s', __url__)
220 |         else:
221 |             console.info('Knowit %s knows everything. :-)', __version__)
222 | 
223 | 
224 | if __name__ == '__main__':
225 |     main(sys.argv[1:])
226 | 


--------------------------------------------------------------------------------
/knowit/serializer.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import re
  4 | import typing
  5 | from datetime import timedelta
  6 | from decimal import Decimal
  7 | 
  8 | import babelfish
  9 | import yaml
 10 | from yaml.composer import Composer
 11 | from yaml.constructor import SafeConstructor
 12 | from yaml.parser import Parser
 13 | from yaml.reader import Reader
 14 | from yaml.resolver import Resolver as DefaultResolver
 15 | from yaml.scanner import Scanner
 16 | 
 17 | from knowit.units import units
 18 | from knowit.utils import round_decimal
 19 | 
 20 | 
 21 | def format_property(profile: str, o):
 22 |     """Convert properties to string."""
 23 |     if isinstance(o, timedelta):
 24 |         return format_duration(o, profile)
 25 | 
 26 |     if isinstance(o, babelfish.language.Language):
 27 |         return format_language(o, profile)
 28 | 
 29 |     if hasattr(o, 'units'):
 30 |         return format_quantity(o, profile)
 31 | 
 32 |     return str(o)
 33 | 
 34 | 
 35 | def get_json_encoder(context):
 36 |     """Return json encoder that handles all needed object types."""
 37 |     class StringEncoder(json.JSONEncoder):
 38 |         """String json encoder."""
 39 | 
 40 |         def default(self, o):
 41 |             return format_property(context['profile'], o)
 42 | 
 43 |     return StringEncoder
 44 | 
 45 | 
 46 | def get_yaml_dumper(context):
 47 |     """Return yaml dumper that handles all needed object types."""
 48 |     class CustomDumper(yaml.SafeDumper):
 49 |         """Custom YAML Dumper."""
 50 | 
 51 |         def default_representer(self, data):
 52 |             """Convert data to string."""
 53 |             if isinstance(data, int):
 54 |                 return self.represent_int(data)
 55 |             return self.represent_str(str(data))
 56 | 
 57 |         def default_language_representer(self, data):
 58 |             """Convert language to string."""
 59 |             return self.represent_str(format_language(data, context['profile']))
 60 | 
 61 |         def default_quantity_representer(self, data):
 62 |             """Convert quantity to string."""
 63 |             return self.default_representer(format_quantity(data, context['profile']))
 64 | 
 65 |         def default_duration_representer(self, data):
 66 |             """Convert quantity to string."""
 67 |             return self.default_representer(format_duration(data, context['profile']))
 68 | 
 69 |     CustomDumper.add_representer(babelfish.Language, CustomDumper.default_language_representer)
 70 |     CustomDumper.add_representer(timedelta, CustomDumper.default_duration_representer)
 71 |     CustomDumper.add_representer(units.Quantity, CustomDumper.default_quantity_representer)
 72 |     CustomDumper.add_representer(Decimal, CustomDumper.default_representer)
 73 | 
 74 |     return CustomDumper
 75 | 
 76 | 
 77 | def get_yaml_loader(constructors=None):
 78 |     """Return a yaml loader that handles sequences as python lists."""
 79 |     constructors = constructors or {}
 80 |     custom_yaml_implicit_resolvers = {
 81 |         ch: [(tag, regexp) for tag, regexp in vs if not tag.endswith('float')]
 82 |         for ch, vs in DefaultResolver.yaml_implicit_resolvers.items()
 83 |     }
 84 | 
 85 |     class Resolver(DefaultResolver):
 86 |         """Custom YAML Resolver."""
 87 | 
 88 |         yaml_implicit_resolvers = custom_yaml_implicit_resolvers
 89 | 
 90 |     Resolver.add_implicit_resolver(  # regex copied from yaml source
 91 |         '!decimal',
 92 |         re.compile(r'''^(?:
 93 |             [-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
 94 |             |\.[0-9_]+(?:[eE][-+][0-9]+)?
 95 |             |[-+]?[0-9][0-9_]*(?::[0-9]?[0-9])+\.[0-9_]*
 96 |             |[-+]?\.(?:inf|Inf|INF)
 97 |             |\.(?:nan|NaN|NAN)
 98 |         )$''', re.VERBOSE),
 99 |         list('-+0123456789.')
100 |     )
101 | 
102 |     class CustomLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
103 |         """Custom YAML Loader."""
104 | 
105 |         def __init__(self, stream):
106 |             Reader.__init__(self, stream)
107 |             Scanner.__init__(self)
108 |             Parser.__init__(self)
109 |             Composer.__init__(self)
110 |             SafeConstructor.__init__(self)
111 |             Resolver.__init__(self)
112 | 
113 |     CustomLoader.add_constructor('tag:yaml.org,2002:seq', yaml.Loader.construct_python_tuple)
114 |     for tag, constructor in constructors.items():
115 |         CustomLoader.add_constructor(tag, constructor)
116 | 
117 |     def decimal_constructor(loader, node):
118 |         value = loader.construct_scalar(node)
119 |         return Decimal(value)
120 | 
121 |     CustomLoader.add_constructor('!decimal', decimal_constructor)
122 | 
123 |     return CustomLoader
124 | 
125 | 
126 | def format_duration(
127 |         duration: datetime.timedelta,
128 |         profile='default',
129 | ) -> typing.Union[str, Decimal]:
130 |     if profile == 'technical':
131 |         return str(duration)
132 | 
133 |     seconds = duration.total_seconds()
134 |     if profile == 'code':
135 |         return round_decimal(
136 |             Decimal((duration.days * 86400 + duration.seconds) * 10 ** 6 + duration.microseconds) / 10**6, min_digits=1
137 |         )
138 | 
139 |     hours = int(seconds // 3600)
140 |     seconds = seconds - (hours * 3600)
141 |     minutes = int(seconds // 60)
142 |     seconds = int(seconds - (minutes * 60))
143 |     if profile == 'human':
144 |         if hours > 0:
145 |             return f'{hours} hours {minutes:02d} minutes {seconds:02d} seconds'
146 |         if minutes > 0:
147 |             return f'{minutes} minutes {seconds:02d} seconds'
148 |         return f'{seconds} seconds'
149 | 
150 |     return f'{hours}:{minutes:02d}:{seconds:02d}'
151 | 
152 | 
153 | def format_language(
154 |         language: babelfish.language.Language,
155 |         profile: str = 'default',
156 | ) -> str:
157 |     if profile in ('default', 'human'):
158 |         return str(language.name)
159 | 
160 |     return str(language)
161 | 
162 | 
163 | def format_quantity(
164 |         quantity,
165 |         profile='default',
166 | ) -> str:
167 |     """Human friendly format."""
168 |     if profile == 'code':
169 |         return quantity.magnitude
170 | 
171 |     unit = quantity.units
172 |     if unit != 'bit':
173 |         technical = profile == 'technical'
174 |         if unit == 'hertz':
175 |             return _format_quantity(quantity.magnitude, unit='Hz', binary=technical, precision=3 if technical else 1)
176 | 
177 |         root_unit = quantity.to_root_units().units
178 |         if root_unit == 'bit':
179 |             return _format_quantity(quantity.magnitude, binary=technical, precision=3 if technical else 2)
180 |         if root_unit == 'bit / second':
181 |             return _format_quantity(quantity.magnitude, unit='bps', binary=technical, precision=3 if technical else 1)
182 | 
183 |     return str(quantity)
184 | 
185 | 
186 | def _format_quantity(
187 |         num,
188 |         unit: str = 'B',
189 |         binary: bool = False,
190 |         precision: int = 2,
191 | ) -> str:
192 |     if binary:
193 |         factor = 1024
194 |         affix = 'i'
195 |     else:
196 |         factor = 1000
197 |         affix = ''
198 |     for prefix in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'):
199 |         if abs(num) < factor:
200 |             break
201 |         num /= factor
202 |     else:
203 |         prefix = 'Y'
204 | 
205 |     return f'{num:3.{precision}f} {prefix}{affix}{unit}'
206 | 
207 | 
208 | YAMLLoader = get_yaml_loader()
209 | 


--------------------------------------------------------------------------------
/knowit/providers/enzyme.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import logging
  4 | from collections import defaultdict
  5 | from logging import NullHandler, getLogger
  6 | import enzyme
  7 | 
  8 | from knowit.core import Property
  9 | from knowit.properties import (
 10 |     AudioCodec,
 11 |     Basic,
 12 |     Duration,
 13 |     Language,
 14 |     Quantity,
 15 |     VideoCodec,
 16 |     YesNo,
 17 | )
 18 | from knowit.provider import (
 19 |     MalformedFileError,
 20 |     Provider,
 21 | )
 22 | from knowit.rules import (
 23 |     AudioChannelsRule,
 24 |     ClosedCaptionRule,
 25 |     HearingImpairedRule,
 26 |     LanguageRule,
 27 |     ResolutionRule,
 28 | )
 29 | from knowit.rules.general import GuessTitleRule
 30 | from knowit.serializer import get_json_encoder
 31 | from knowit.units import units
 32 | from knowit.utils import to_dict
 33 | 
 34 | logger = getLogger(__name__)
 35 | logger.addHandler(NullHandler())
 36 | 
 37 | 
 38 | class EnzymeProvider(Provider):
 39 |     """Enzyme Provider."""
 40 | 
 41 |     def __init__(self, config, *args, **kwargs):
 42 |         """Init method."""
 43 |         super().__init__(config, {
 44 |             'general': {
 45 |                 'title': Property('title', description='media title'),
 46 |                 'duration': Duration('duration', description='media duration'),
 47 |             },
 48 |             'video': {
 49 |                 'id': Basic('number', data_type=int, description='video track number'),
 50 |                 'name': Property('name', description='video track name'),
 51 |                 'language': Language('language', description='video language'),
 52 |                 'width': Quantity('width', unit=units.pixel),
 53 |                 'height': Quantity('height', unit=units.pixel),
 54 |                 'scan_type': YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive',
 55 |                                    config=config, config_key='ScanType',
 56 |                                    description='video scan type'),
 57 |                 'resolution': None,  # populated with ResolutionRule
 58 |                 # 'bit_depth', Property('bit_depth', Integer('video bit depth')),
 59 |                 'codec': VideoCodec(config, 'codec_id', description='video codec'),
 60 |                 'forced': YesNo('forced', hide_value=False, description='video track forced'),
 61 |                 'default': YesNo('default', hide_value=False, description='video track default'),
 62 |                 'enabled': YesNo('enabled', hide_value=True, description='video track enabled'),
 63 |             },
 64 |             'audio': {
 65 |                 'id': Basic('number', data_type=int, description='audio track number'),
 66 |                 'name': Property('name', description='audio track name'),
 67 |                 'language': Language('language', description='audio language'),
 68 |                 'codec': AudioCodec(config, 'codec_id', description='audio codec'),
 69 |                 'channels_count': Basic('channels', data_type=int, description='audio channels count'),
 70 |                 'channels': None,  # populated with AudioChannelsRule
 71 |                 'forced': YesNo('forced', hide_value=False, description='audio track forced'),
 72 |                 'default': YesNo('default', hide_value=False, description='audio track default'),
 73 |                 'enabled': YesNo('enabled', hide_value=True, description='audio track enabled'),
 74 |             },
 75 |             'subtitle': {
 76 |                 'id': Basic('number', data_type=int, description='subtitle track number'),
 77 |                 'name': Property('name', description='subtitle track name'),
 78 |                 'language': Language('language', description='subtitle language'),
 79 |                 'hearing_impaired': None,  # populated with HearingImpairedRule
 80 |                 'closed_caption': None,  # populated with ClosedCaptionRule
 81 |                 'forced': YesNo('forced', hide_value=False, description='subtitle track forced'),
 82 |                 'default': YesNo('default', hide_value=False, description='subtitle track default'),
 83 |                 'enabled': YesNo('enabled', hide_value=True, description='subtitle track enabled'),
 84 |             },
 85 |         }, {
 86 |             'video': {
 87 |                 'guessed': GuessTitleRule('guessed properties', private=True),
 88 |                 'language': LanguageRule('video language', override=True),
 89 |                 'resolution': ResolutionRule('video resolution'),
 90 |             },
 91 |             'audio': {
 92 |                 'guessed': GuessTitleRule('guessed properties', private=True),
 93 |                 'language': LanguageRule('audio language', override=True),
 94 |                 'channels': AudioChannelsRule('audio channels'),
 95 |             },
 96 |             'subtitle': {
 97 |                 'guessed': GuessTitleRule('guessed properties', private=True),
 98 |                 'language': LanguageRule('subtitle language', override=True),
 99 |                 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
100 |                 'closed_caption': ClosedCaptionRule('closed caption', override=True),
101 |             }
102 |         })
103 | 
104 |     def loaded(self) -> bool:
105 |         """Return always True as enzyme is always imported."""
106 |         return True
107 | 
108 |     def accepts(self, video_path):
109 |         """Accept only MKV files."""
110 |         return video_path.lower().endswith('.mkv')
111 | 
112 |     @classmethod
113 |     def extract_info(cls, video_path):
114 |         """Extract info from the video."""
115 |         with open(video_path, 'rb') as f:
116 |             return to_dict(enzyme.MKV(f))
117 | 
118 |     def describe(self, video_path, context):
119 |         """Return video metadata."""
120 |         try:
121 |             data = defaultdict(dict)
122 |             ff = self.extract_info(video_path)
123 | 
124 |             def debug_data():
125 |                 """Debug data."""
126 |                 return json.dumps(ff, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
127 |             context['debug_data'] = debug_data
128 | 
129 |             if logger.isEnabledFor(logging.DEBUG):
130 |                 logger.debug('Video %r scanned using enzyme %r has raw data:\n%s',
131 |                              video_path, enzyme.__version__, debug_data)
132 | 
133 |             data.update(ff)
134 |             if 'info' in data and data['info'] is None:
135 |                 return {}
136 |         except enzyme.MalformedMKVError:  # pragma: no cover
137 |             raise MalformedFileError
138 | 
139 |         if logger.level == logging.DEBUG:
140 |             logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
141 |                          video_path=video_path, version=enzyme.__version__,
142 |                          data=json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False))
143 | 
144 |         result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
145 |                                        data.get('audio_tracks'), data.get('subtitle_tracks'), context)
146 | 
147 |         if not result:
148 |             raise MalformedFileError
149 | 
150 |         result['provider'] = {
151 |             'name': 'enzyme',
152 |             'version': self.version
153 |         }
154 | 
155 |         return result
156 | 
157 |     @property
158 |     def version(self):
159 |         """Return enzyme version information."""
160 |         return {'enzyme': enzyme.__version__}
161 | 


--------------------------------------------------------------------------------
/tests/data/mediainfo/7.1-dts-hd-ma-speaker-mapping-test-file.mkv.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "media":{
  3 |     "@ref":"tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv",
  4 |     "track":[
  5 |       {
  6 |         "@type":"General",
  7 |         "Count":"331",
  8 |         "StreamCount":"1",
  9 |         "StreamKind":"General",
 10 |         "StreamKind_String":"General",
 11 |         "StreamKindID":"0",
 12 |         "UniqueID":"233259075675158394332089080443022607862",
 13 |         "UniqueID_String":"233259075675158394332089080443022607862 (0xAF7C105968F28EDE95280D4670BC05F6)",
 14 |         "VideoCount":"1",
 15 |         "AudioCount":"1",
 16 |         "Video_Format_List":"AVC",
 17 |         "Video_Format_WithHint_List":"AVC",
 18 |         "Video_Codec_List":"AVC",
 19 |         "Audio_Format_List":"DTS XLL",
 20 |         "Audio_Format_WithHint_List":"DTS XLL",
 21 |         "Audio_Codec_List":"DTS XLL",
 22 |         "Audio_Language_List":"English",
 23 |         "CompleteName":"tests/data/7.1-dts-hd-ma-speaker-mapping-test-file.mkv",
 24 |         "FolderName":"tests/data",
 25 |         "FileNameExtension":"7.1-dts-hd-ma-speaker-mapping-test-file.mkv",
 26 |         "FileName":"7.1-dts-hd-ma-speaker-mapping-test-file",
 27 |         "FileExtension":"mkv",
 28 |         "Format":"Matroska",
 29 |         "Format_String":"Matroska",
 30 |         "Format_Url":"https://matroska.org/downloads/windows.html",
 31 |         "Format_Extensions":"mkv mk3d mka mks",
 32 |         "Format_Commercial":"Matroska",
 33 |         "Format_Version":"4",
 34 |         "FileSize":"40772443",
 35 |         "FileSize_String":"38.9 MiB",
 36 |         "FileSize_String1":"39 MiB",
 37 |         "FileSize_String2":"39 MiB",
 38 |         "FileSize_String3":"38.9 MiB",
 39 |         "FileSize_String4":"38.88 MiB",
 40 |         "Duration":"97.931",
 41 |         "Duration_String":"1 min 37 s",
 42 |         "Duration_String1":"1 min 37 s 931 ms",
 43 |         "Duration_String2":"1 min 37 s",
 44 |         "Duration_String3":"00:01:37.931",
 45 |         "Duration_String4":"00:01:37;22",
 46 |         "Duration_String5":"00:01:37.931 (00:01:37;22)",
 47 |         "OverallBitRate_Mode":"VBR",
 48 |         "OverallBitRate_Mode_String":"Variable",
 49 |         "OverallBitRate":"3330708",
 50 |         "OverallBitRate_String":"3 331 kb/s",
 51 |         "FrameRate":"23.976",
 52 |         "FrameRate_String":"23.976 FPS",
 53 |         "FrameCount":"2348",
 54 |         "IsStreamable":"Yes",
 55 |         "Title":"7.1Ch DTS-HD MA - Speaker Mapping Test File",
 56 |         "Movie":"7.1Ch DTS-HD MA - Speaker Mapping Test File",
 57 |         "Encoded_Date":"UTC 2013-12-13 17:49:28",
 58 |         "File_Modified_Date":"UTC 2016-04-10 07:47:08",
 59 |         "File_Modified_Date_Local":"2016-04-10 09:47:08",
 60 |         "Encoded_Application":"mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec  1 2013 17:55:00",
 61 |         "Encoded_Application_String":"mkvmerge v6.6.0 ('The Edge Of The In Between') built on Dec  1 2013 17:55:00",
 62 |         "Encoded_Library":"libebml v1.3.0 + libmatroska v1.4.1",
 63 |         "Encoded_Library_String":"libebml v1.3.0 + libmatroska v1.4.1"
 64 |       },
 65 |       {
 66 |         "@type":"Video",
 67 |         "Count":"379",
 68 |         "StreamCount":"1",
 69 |         "StreamKind":"Video",
 70 |         "StreamKind_String":"Video",
 71 |         "StreamKindID":"0",
 72 |         "StreamOrder":"0",
 73 |         "ID":"1",
 74 |         "ID_String":"1",
 75 |         "UniqueID":"11835337130358454411",
 76 |         "Format":"AVC",
 77 |         "Format_String":"AVC",
 78 |         "Format_Info":"Advanced Video Codec",
 79 |         "Format_Url":"http://developers.videolan.org/x264.html",
 80 |         "Format_Commercial":"AVC",
 81 |         "Format_Profile":"Main",
 82 |         "Format_Level":"4",
 83 |         "Format_Settings":"CABAC / 4 Ref Frames",
 84 |         "Format_Settings_CABAC":"Yes",
 85 |         "Format_Settings_CABAC_String":"Yes",
 86 |         "Format_Settings_RefFrames":"4",
 87 |         "Format_Settings_RefFrames_String":"4 frames",
 88 |         "InternetMediaType":"video/H264",
 89 |         "CodecID":"V_MPEG4/ISO/AVC",
 90 |         "CodecID_Url":"http://ffdshow-tryout.sourceforge.net/",
 91 |         "Duration":"97.931",
 92 |         "Duration_String":"1 min 37 s",
 93 |         "Duration_String1":"1 min 37 s 931 ms",
 94 |         "Duration_String2":"1 min 37 s",
 95 |         "Duration_String3":"00:01:37.931",
 96 |         "Duration_String4":"00:01:37;22",
 97 |         "Duration_String5":"00:01:37.931 (00:01:37;22)",
 98 |         "BitRate_Mode":"VBR",
 99 |         "BitRate_Mode_String":"Variable",
100 |         "BitRate_Maximum":"40000000",
101 |         "BitRate_Maximum_String":"40.0 Mb/s",
102 |         "Width":"1920",
103 |         "Width_String":"1 920 pixels",
104 |         "Height":"1080",
105 |         "Height_String":"1 080 pixels",
106 |         "Stored_Height":"1088",
107 |         "Sampled_Width":"1920",
108 |         "Sampled_Height":"1080",
109 |         "PixelAspectRatio":"1.000",
110 |         "DisplayAspectRatio":"1.778",
111 |         "DisplayAspectRatio_String":"16:9",
112 |         "FrameRate_Mode":"CFR",
113 |         "FrameRate_Mode_String":"Constant",
114 |         "FrameRate":"23.976",
115 |         "FrameRate_String":"23.976 FPS",
116 |         "FrameCount":"2348",
117 |         "ColorSpace":"YUV",
118 |         "ChromaSubsampling":"4:2:0",
119 |         "ChromaSubsampling_String":"4:2:0",
120 |         "BitDepth":"8",
121 |         "BitDepth_String":"8 bits",
122 |         "ScanType":"Progressive",
123 |         "ScanType_String":"Progressive",
124 |         "Delay":"0.000",
125 |         "Delay_String3":"00:00:00.000",
126 |         "Delay_Source":"Container",
127 |         "Delay_Source_String":"Container",
128 |         "Default":"Yes",
129 |         "Default_String":"Yes",
130 |         "Forced":"No",
131 |         "Forced_String":"No",
132 |         "BufferSize":"30000000",
133 |         "extra":{
134 |           "FrameCount_Source":"General_Duration",
135 |           "Duration_Source":"General_Duration"
136 |         }
137 |       },
138 |       {
139 |         "@type":"Audio",
140 |         "Count":"282",
141 |         "StreamCount":"1",
142 |         "StreamKind":"Audio",
143 |         "StreamKind_String":"Audio",
144 |         "StreamKindID":"0",
145 |         "StreamOrder":"1",
146 |         "ID":"2",
147 |         "ID_String":"2",
148 |         "UniqueID":"11679026580382524291",
149 |         "Format":"DTS",
150 |         "Format_String":"DTS XLL",
151 |         "Format_Info":"Digital Theater Systems",
152 |         "Format_Url":"https://en.wikipedia.org/wiki/DTS_(sound_system)",
153 |         "Format_Commercial":"DTS-HD Master Audio",
154 |         "Format_Commercial_IfAny":"DTS-HD Master Audio",
155 |         "Format_Settings_Mode":"16",
156 |         "Format_Settings_Endianness":"Big",
157 |         "Format_AdditionalFeatures":"XLL",
158 |         "CodecID":"A_DTS",
159 |         "Duration":"97.931",
160 |         "Duration_String":"1 min 37 s",
161 |         "Duration_String1":"1 min 37 s 931 ms",
162 |         "Duration_String2":"1 min 37 s",
163 |         "Duration_String3":"00:01:37.931",
164 |         "Duration_String5":"00:01:37.931",
165 |         "BitRate_Mode":"VBR",
166 |         "BitRate_Mode_String":"Variable",
167 |         "Channels":"6",
168 |         "Channels_String":"6 channels",
169 |         "Channels_Original":"8",
170 |         "Channels_Original_String":"8 channels",
171 |         "ChannelPositions_Original":"Front: L C R, Side: L R, Back: L R, LFE",
172 |         "ChannelLayout_Original":"C L R LFE Lsr Rsr Lss Rss",
173 |         "SamplesPerFrame":"512",
174 |         "SamplingRate":"48000",
175 |         "SamplingRate_String":"48.0 kHz",
176 |         "SamplingCount":"4700688",
177 |         "FrameRate":"93.750",
178 |         "FrameRate_String":"93.750 FPS (512 SPF)",
179 |         "BitDepth":"24",
180 |         "BitDepth_String":"24 bits",
181 |         "Compression_Mode":"Lossless",
182 |         "Compression_Mode_String":"Lossless",
183 |         "Delay":"0.000",
184 |         "Delay_String3":"00:00:00.000",
185 |         "Delay_Source":"Container",
186 |         "Delay_Source_String":"Container",
187 |         "Video_Delay":"0.000",
188 |         "Video_Delay_String3":"00:00:00.000",
189 |         "Title":"7.1Ch DTS-HD MA",
190 |         "Language":"en",
191 |         "Language_String":"English",
192 |         "Language_String1":"English",
193 |         "Language_String2":"en",
194 |         "Language_String3":"eng",
195 |         "Language_String4":"en",
196 |         "Default":"Yes",
197 |         "Default_String":"Yes",
198 |         "Forced":"No",
199 |         "Forced_String":"No",
200 |         "extra":{
201 |           "SamplingCount_Source":"General_Duration",
202 |           "Duration_Source":"General_Duration"
203 |         }
204 |       }
205 |     ]
206 |   }
207 | }


--------------------------------------------------------------------------------
/knowit/core.py:
--------------------------------------------------------------------------------
  1 | import typing
  2 | from logging import NullHandler, getLogger
  3 | 
  4 | logger = getLogger(__name__)
  5 | logger.addHandler(NullHandler())
  6 | 
  7 | T = typing.TypeVar('T')
  8 | 
  9 | _visible_chars_table = dict.fromkeys(range(32))
 10 | 
 11 | 
 12 | def _is_unknown(value: typing.Any) -> bool:
 13 |     return isinstance(value, str) and (not value or value.lower() == 'unknown')
 14 | 
 15 | 
 16 | class Reportable(typing.Generic[T]):
 17 |     """Reportable abstract class."""
 18 | 
 19 |     def __init__(
 20 |             self,
 21 |             *args: str,
 22 |             description: typing.Optional[str] = None,
 23 |             reportable: bool = True,
 24 |     ):
 25 |         """Initialize the object."""
 26 |         self.names = args
 27 |         self._description = description
 28 |         self.reportable = reportable
 29 | 
 30 |     @property
 31 |     def description(self) -> str:
 32 |         """Rule description."""
 33 |         return self._description or '|'.join(self.names)
 34 | 
 35 |     def report(self, value: typing.Union[str, T], context: typing.MutableMapping) -> None:
 36 |         """Report unknown value."""
 37 |         if not value or not self.reportable:
 38 |             return
 39 | 
 40 |         if 'report' in context:
 41 |             report_map = context['report'].setdefault(self.description, {})
 42 |             if value not in report_map:
 43 |                 report_map[value] = context['path']
 44 |         logger.info('Invalid %s: %r', self.description, value)
 45 | 
 46 | 
 47 | class Property(Reportable[T]):
 48 |     """Property class."""
 49 | 
 50 |     def __init__(
 51 |             self,
 52 |             *args: str,
 53 |             default: typing.Optional[T] = None,
 54 |             private: bool = False,
 55 |             description: typing.Optional[str] = None,
 56 |             delimiter: str = ' / ',
 57 |             **kwargs,
 58 |     ):
 59 |         """Init method."""
 60 |         super().__init__(*args, description=description, **kwargs)
 61 |         self.default = default
 62 |         self.private = private
 63 |         # Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive
 64 |         self.delimiter = delimiter
 65 | 
 66 |     @classmethod
 67 |     def _extract_value(cls,
 68 |                        track: typing.Mapping,
 69 |                        name: str,
 70 |                        names: typing.List[str]):
 71 |         if len(names) == 2:
 72 |             parent_value = track.get(names[0], track.get(names[0].upper(), {}))
 73 |             return parent_value.get(names[1], parent_value.get(names[1].upper()))
 74 | 
 75 |         return track.get(name, track.get(name.upper()))
 76 | 
 77 |     def extract_value(
 78 |             self,
 79 |             track: typing.Mapping,
 80 |             context: typing.MutableMapping,
 81 |     ) -> typing.Optional[T]:
 82 |         """Extract the property value from a given track."""
 83 |         for name in self.names:
 84 |             names = name.split('.')
 85 |             value = self._extract_value(track, name, names)
 86 |             if value is None:
 87 |                 if self.default is None:
 88 |                     continue
 89 | 
 90 |                 value = self.default
 91 | 
 92 |             if isinstance(value, bytes):
 93 |                 value = value.decode()
 94 | 
 95 |             if isinstance(value, str):
 96 |                 value = value.translate(_visible_chars_table).strip()
 97 |                 if _is_unknown(value):
 98 |                     continue
 99 |                 value = self._deduplicate(value)
100 | 
101 |             result = self.handle(value, context)
102 |             if result is not None and not _is_unknown(result):
103 |                 return result
104 | 
105 |         return None
106 | 
107 |     @classmethod
108 |     def _deduplicate(cls, value: str) -> str:
109 |         values = value.split(' / ')
110 |         if len(values) == 2 and values[0] == values[1]:
111 |             return values[0]
112 |         return value
113 | 
114 |     def handle(self, value: T, context: typing.MutableMapping) -> typing.Optional[T]:
115 |         """Return the value without any modification."""
116 |         return value
117 | 
118 | 
119 | class Configurable(Property[T]):
120 |     """Configurable property where values are in a config mapping."""
121 | 
122 |     def __init__(self, config: typing.Mapping[str, typing.Mapping], *args: str,
123 |                  config_key: typing.Optional[str] = None, **kwargs):
124 |         """Init method."""
125 |         super().__init__(*args, **kwargs)
126 |         self.mapping = getattr(config, config_key or self.__class__.__name__) if config else {}
127 | 
128 |     @classmethod
129 |     def _extract_key(cls, value: str) -> typing.Union[str, bool]:
130 |         return value.upper()
131 | 
132 |     @classmethod
133 |     def _extract_fallback_key(cls, value: str, key: str) -> typing.Optional[T]:
134 |         return None
135 | 
136 |     def _lookup(
137 |             self,
138 |             key: str,
139 |             context: typing.MutableMapping,
140 |     ) -> typing.Union[T, None, bool]:
141 |         result = self.mapping.get(key)
142 |         if result is not None:
143 |             result = getattr(result, context.get('profile') or 'default')
144 |             return result if result != '__ignored__' else False
145 |         return None
146 | 
147 |     def handle(self, value, context):
148 |         """Return Variable or Constant."""
149 |         key = self._extract_key(value)
150 |         if key is False:
151 |             return None
152 | 
153 |         result = self._lookup(key, context)
154 |         if result is False:
155 |             return None
156 | 
157 |         while not result and key:
158 |             key = self._extract_fallback_key(value, key)
159 |             result = self._lookup(key, context)
160 |             if result is False:
161 |                 return None
162 | 
163 |         if not result:
164 |             self.report(value, context)
165 | 
166 |         return result
167 | 
168 | 
169 | class MultiValue(Property):
170 |     """Property with multiple values."""
171 | 
172 |     def __init__(self, prop: typing.Optional[Property] = None, delimiter='/', single=False,
173 |                  handler: typing.Optional[
174 |                      typing.Callable[[typing.Optional[str], typing.MutableMapping], typing.Optional[str]]] = None,
175 |                  name=None, **kwargs):
176 |         """Init method."""
177 |         super().__init__(*(prop.names if prop else (name,)), **kwargs)
178 |         self.prop = prop
179 |         self.delimiter = delimiter
180 |         self.single = single
181 |         self.handler = handler
182 | 
183 |     def handle(
184 |             self,
185 |             value: str,
186 |             context: typing.MutableMapping,
187 |     ) -> typing.Optional[typing.Union[str, typing.List[str]]]:
188 |         """Handle properties with multiple values."""
189 |         if self.handler:
190 |             call = self.handler
191 |         elif self.prop:
192 |             call = self.prop.handle
193 |         else:
194 |             call = None
195 | 
196 |         if call is None:
197 |             raise NotImplementedError('No handler available')
198 | 
199 |         result = call(value, context)
200 |         if result is not None:
201 |             return result
202 | 
203 |         if isinstance(value, list):
204 |             if len(value) == 1:
205 |                 values = self._split(value[0], self.delimiter)
206 |             else:
207 |                 values = value
208 |         else:
209 |             values = self._split(value, self.delimiter)
210 | 
211 |         if values is None:
212 |             return call(values, context)
213 |         if len(values) > 1 and not self.single:
214 |             part_results = [call(item, context) if not _is_unknown(item) else None for item in values]
215 |             results = [r for r in part_results if r is not None]
216 |             if results:
217 |                 return results
218 |         return call(values[0], context)
219 | 
220 |     @classmethod
221 |     def _split(
222 |             cls,
223 |             value: typing.Optional[T],
224 |             delimiter: str = '/',
225 |     ) -> typing.Optional[typing.List[str]]:
226 |         if value is None:
227 |             return None
228 | 
229 |         return [x.strip() for x in str(value).split(delimiter)]
230 | 
231 | 
232 | class Rule(Reportable[T]):
233 |     """Rule abstract class."""
234 | 
235 |     def __init__(self, name: str, private=False, override=False, **kwargs):
236 |         """Initialize the object."""
237 |         super().__init__(name, **kwargs)
238 |         self.private = private
239 |         self.override = override
240 | 
241 |     def execute(self, props, pv_props, context: typing.Mapping):
242 |         """How to execute a rule."""
243 |         raise NotImplementedError
244 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # KnowIt
  2 | 
  3 | Know better your media files.
  4 | 
  5 | [![Latest
  6 | Version](https://img.shields.io/pypi/v/knowit.svg)](https://pypi.python.org/pypi/knowit)
  7 | 
  8 | [![tests](https://github.com/ratoaq2/knowit/actions/workflows/test.yml/badge.svg)](https://github.com/ratoaq2/knowit/actions/workflows/test.yml)
  9 | 
 10 | [![License](https://img.shields.io/github/license/ratoaq2/knowit.svg)](https://github.com/ratoaq2/knowit/blob/master/LICENSE)
 11 | 
 12 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/knowit)
 13 | 
 14 |   - Project page  
 15 |     <https://github.com/ratoaq2/knowit>
 16 | 
 17 | ## Usage
 18 | 
 19 | ### CLI
 20 | 
 21 | Extract information from a video file:
 22 | 
 23 |     $ knowit /folder/Audio Samples/hd_dtsma_7.1.mkv
 24 |     For: /folder/Audio Samples/hd_dtsma_7.1.mkv
 25 |     Knowit 0.4.0 found:
 26 |     {
 27 |         "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File",
 28 |         "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv",
 29 |         "duration": "0:01:37",
 30 |         "size": "40.77 MB",
 31 |         "bit_rate": "3.3 Mbps",
 32 |         "container": "mkv",
 33 |         "video": [
 34 |             {
 35 |                 "id": 1,
 36 |                 "duration": "0:01:37",
 37 |                 "width": "1920 pixel",
 38 |                 "height": "1080 pixel",
 39 |                 "scan_type": "Progressive",
 40 |                 "aspect_ratio": "1.778",
 41 |                 "pixel_aspect_ratio": "1.0",
 42 |                 "resolution": "1080p",
 43 |                 "frame_rate": "23.976 FPS",
 44 |                 "bit_depth": "8 bit",
 45 |                 "codec": "H.264",
 46 |                 "profile": "Main",
 47 |                 "profile_level": "4",
 48 |                 "media_type": "video/H264",
 49 |                 "default": true
 50 |             }
 51 |         ],
 52 |         "audio": [
 53 |             {
 54 |                 "id": 2,
 55 |                 "name": "7.1Ch DTS-HD MA",
 56 |                 "language": "English",
 57 |                 "duration": "0:01:37",
 58 |                 "codec": "DTS-HD",
 59 |                 "profile": "Master Audio",
 60 |                 "channels_count": 8,
 61 |                 "channels": "7.1",
 62 |                 "bit_depth": "24 bit",
 63 |                 "bit_rate_mode": "Variable",
 64 |                 "sampling_rate": "48.0 KHz",
 65 |                 "compression": "Lossless",
 66 |                 "default": true
 67 |             }
 68 |         ],
 69 |         "provider": {
 70 |             "name": "mediainfo",
 71 |             "version": {
 72 |                 "pymediainfo": "5.0.3",
 73 |                 "libmediainfo.so.0": "v20.9"
 74 |             }
 75 |         }
 76 |     }
 77 | 
 78 | Extract information from a video file using ffmpeg:
 79 | 
 80 |     $ knowit --provider ffmpeg /folder/Audio Samples/hd_dtsma_7.1.mkv
 81 |     For: /folder/Audio Samples/hd_dtsma_7.1.mkv
 82 |     Knowit 0.4.0 found:
 83 |     {
 84 |         "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File",
 85 |         "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv",
 86 |         "duration": "0:01:37",
 87 |         "size": "40.77 MB",
 88 |         "bit_rate": "3.3 Mbps",
 89 |         "container": "mkv",
 90 |         "video": [
 91 |             {
 92 |                 "id": 0,
 93 |                 "width": "1920 pixel",
 94 |                 "height": "1080 pixel",
 95 |                 "scan_type": "Progressive",
 96 |                 "aspect_ratio": "1.778",
 97 |                 "pixel_aspect_ratio": "1.0",
 98 |                 "resolution": "1080p",
 99 |                 "frame_rate": "23.976 FPS",
100 |                 "bit_depth": "8 bit",
101 |                 "codec": "H.264",
102 |                 "profile": "Main",
103 |                 "default": true
104 |             }
105 |         ],
106 |         "audio": [
107 |             {
108 |                 "id": 1,
109 |                 "name": "7.1Ch DTS-HD MA",
110 |                 "language": "English",
111 |                 "codec": "DTS-HD",
112 |                 "profile": "Master Audio",
113 |                 "channels_count": 8,
114 |                 "channels": "7.1",
115 |                 "bit_depth": "24 bit",
116 |                 "sampling_rate": "48.0 KHz",
117 |                 "default": true
118 |             }
119 |         ],
120 |         "provider": {
121 |             "name": "ffmpeg",
122 |             "version": {
123 |                 "ffprobe": "v4.2.4-1ubuntu0.1"
124 |             }
125 |         }
126 |     }
127 | 
128 | Using docker:
129 | 
130 |     docker run -it --rm -v /folder:/folder knowit /folder/Audio Samples/hd_dtsma_7.1.mkv
131 |     For: /folder/Audio Samples/hd_dtsma_7.1.mkv
132 |     Knowit 0.4.0 found:
133 |     {
134 |         "title": "7.1Ch DTS-HD MA - Speaker Mapping Test File",
135 |         "path": "/folder/Audio Samples/hd_dtsma_7.1.mkv",
136 |         "duration": "0:01:37",
137 |         "size": "40.77 MB",
138 |         "bit_rate": "3.3 Mbps",
139 |         "container": "mkv",
140 |         "video": [
141 |             {
142 |                 "id": 1,
143 |                 "duration": "0:01:37",
144 |                 "width": "1920 pixel",
145 |                 "height": "1080 pixel",
146 |                 "scan_type": "Progressive",
147 |                 "aspect_ratio": "1.778",
148 |                 "pixel_aspect_ratio": "1.0",
149 |                 "resolution": "1080p",
150 |                 "frame_rate": "23.976 FPS",
151 |                 "bit_depth": "8 bit",
152 |                 "codec": "H.264",
153 |                 "profile": "Main",
154 |                 "profile_level": "4",
155 |                 "media_type": "video/H264",
156 |                 "default": true
157 |             }
158 |         ],
159 |         "audio": [
160 |             {
161 |                 "id": 2,
162 |                 "name": "7.1Ch DTS-HD MA",
163 |                 "language": "English",
164 |                 "duration": "0:01:37",
165 |                 "codec": "DTS-HD",
166 |                 "profile": "Master Audio",
167 |                 "channels_count": 8,
168 |                 "channels": "7.1",
169 |                 "bit_depth": "24 bit",
170 |                 "bit_rate_mode": "Variable",
171 |                 "sampling_rate": "48.0 KHz",
172 |                 "compression": "Lossless",
173 |                 "default": true
174 |             }
175 |         ],
176 |         "provider": {
177 |             "name": "mediainfo",
178 |             "version": {
179 |                 "pymediainfo": "5.0.3",
180 |                 "libmediainfo.so.0": "v20.9"
181 |             }
182 |         }
183 |     }
184 | 
185 | All available CLI options:
186 | 
187 |     $ knowit --help
188 |     usage: knowit [-h] [-p PROVIDER] [--debug] [--report] [-y] [-N] [-P PROFILE] [--mediainfo MEDIAINFO] [--ffmpeg FFMPEG] [--mkvmerge MKVMERGE] [--version] [videopath [videopath ...]]
189 |     
190 |     positional arguments:
191 |       videopath             Path to the video to introspect
192 |     
193 |     optional arguments:
194 |       -h, --help            show this help message and exit
195 |     
196 |     Providers:
197 |       -p PROVIDER, --provider PROVIDER
198 |                             The provider to be used: mediainfo, ffmpeg, mkvmerge or enzyme.
199 |     
200 |     Output:
201 |       --debug               Print information for debugging knowit and for reporting bugs.
202 |       --report              Parse media and report all non-detected values
203 |       -y, --yaml            Display output in yaml format
204 |       -N, --no-units        Display output without units
205 |       -P PROFILE, --profile PROFILE
206 |                             Display values according to specified profile: code, default, human, technical
207 |     
208 |     Configuration:
209 |       --mediainfo MEDIAINFO
210 |                             The location to search for MediaInfo binaries
211 |       --ffmpeg FFMPEG       The location to search for ffprobe (FFmpeg) binaries
212 |       --mkvmerge MKVMERGE   The location to search for mkvmerge (MKVToolNix) binaries
213 |     
214 |     Information:
215 |       --version             Display knowit version.
216 | 
217 | ## Installation
218 | 
219 | KnowIt can be installed as a regular python module by running:
220 | 
221 |     $ [sudo] pip install knowit
222 | 
223 | For a better isolation with your system you should use a dedicated
224 | virtualenv or install for your user only using the `--user` flag.
225 | 
226 | ## External dependencies
227 | 
228 | KnowIt can use MediaInfo, ffprobe (FFmpeg) or mkvmerge (MKVToolNix)
229 | 
230 | KnowIt supports MKV regardless if MediaInfo, FFmpeg or MKVToolNix are
231 | installed.
232 | 
233 | MediaInfo, FFmpeg or MKVToolNix increases the number of supported
234 | formats and the number of extracted information.
235 | 
236 | MediaInfo is the default provider. Visit their
237 | [website](http://mediaarea.net/MediaInfo) and install the proper package
238 | for your system.
239 | 
240 | ffprobe (FFmpeg) can be downloaded
241 | [here](https://ffmpeg.org/download.html)
242 | 
243 | mkvmerge (MKVToolNix) can be downloaded
244 | [here](https://mkvtoolnix.download/downloads.html)
245 | 


--------------------------------------------------------------------------------
/tests/data/enzyme/issue-24-example-01.mkv.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "info": {
  3 |         "title": "The 100 06x09 (What You Take With You) - release by messafan for CasStudio",
  4 |         "duration": "0:42:05",
  5 |         "date_utc": "2019-07-26 11:08:51",
  6 |         "muxing_app": "libebml v1.3.7 + libmatroska v1.5.0",
  7 |         "writing_app": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit"
  8 |     },
  9 |     "video_tracks": [
 10 |         {
 11 |             "type": 1,
 12 |             "number": 1,
 13 |             "language": "und",
 14 |             "enabled": true,
 15 |             "default": true,
 16 |             "forced": false,
 17 |             "lacing": false,
 18 |             "codec_id": "V_MPEG4/ISO/AVC",
 19 |             "width": 1280,
 20 |             "height": 720,
 21 |             "interlaced": false,
 22 |             "stereo_mode": 0,
 23 |             "crop": {},
 24 |             "display_width": 1280,
 25 |             "display_height": 720,
 26 |             "aspect_ratio_type": 0
 27 |         }
 28 |     ],
 29 |     "audio_tracks": [
 30 |         {
 31 |             "type": 2,
 32 |             "number": 2,
 33 |             "language": "por",
 34 |             "enabled": true,
 35 |             "default": true,
 36 |             "forced": true,
 37 |             "lacing": true,
 38 |             "codec_id": "A_AC3",
 39 |             "sampling_frequency": 48000.0,
 40 |             "channels": 2,
 41 |             "output_sampling_frequency": 48000.0
 42 |         },
 43 |         {
 44 |             "type": 2,
 45 |             "number": 3,
 46 |             "name": "Stereo",
 47 |             "language": "eng",
 48 |             "enabled": true,
 49 |             "default": false,
 50 |             "forced": false,
 51 |             "lacing": true,
 52 |             "codec_id": "A_AC3",
 53 |             "sampling_frequency": 48000.0,
 54 |             "channels": 2,
 55 |             "output_sampling_frequency": 48000.0
 56 |         }
 57 |     ],
 58 |     "subtitle_tracks": [],
 59 |     "chapters": [],
 60 |     "tags": [
 61 |         {
 62 |             "targets": [
 63 |                 {
 64 |                     "id": 26826,
 65 |                     "type": 1,
 66 |                     "name": "TargetTypeValue",
 67 |                     "level": 4,
 68 |                     "position": 652854402,
 69 |                     "size": 1,
 70 |                     "data": 50
 71 |                 }
 72 |             ],
 73 |             "simpletags": [
 74 |                 {
 75 |                     "name": "ENCODER",
 76 |                     "language": "und",
 77 |                     "default": true,
 78 |                     "string": "Lavf58.20.100"
 79 |                 }
 80 |             ]
 81 |         },
 82 |         {
 83 |             "targets": [
 84 |                 {
 85 |                     "id": 26826,
 86 |                     "type": 1,
 87 |                     "name": "TargetTypeValue",
 88 |                     "level": 4,
 89 |                     "position": 652854449,
 90 |                     "size": 1,
 91 |                     "data": 50
 92 |                 },
 93 |                 {
 94 |                     "id": 25541,
 95 |                     "type": 1,
 96 |                     "name": "TagTrackUID",
 97 |                     "level": 4,
 98 |                     "position": 652854453,
 99 |                     "size": 8,
100 |                     "data": 12656891187800456645
101 |                 },
102 |                 {
103 |                     "id": 25546,
104 |                     "type": 3,
105 |                     "name": "TargetType",
106 |                     "level": 4,
107 |                     "position": 652854464,
108 |                     "size": 5,
109 |                     "data": "MOVIE"
110 |                 }
111 |             ],
112 |             "simpletags": [
113 |                 {
114 |                     "name": "BPS",
115 |                     "language": "eng",
116 |                     "default": true,
117 |                     "string": "192000"
118 |                 },
119 |                 {
120 |                     "name": "DURATION",
121 |                     "language": "eng",
122 |                     "default": true,
123 |                     "string": "00:42:05.056000000"
124 |                 },
125 |                 {
126 |                     "name": "NUMBER_OF_FRAMES",
127 |                     "language": "eng",
128 |                     "default": true,
129 |                     "string": "78908"
130 |                 },
131 |                 {
132 |                     "name": "NUMBER_OF_BYTES",
133 |                     "language": "eng",
134 |                     "default": true,
135 |                     "string": "60601344"
136 |                 },
137 |                 {
138 |                     "name": "_STATISTICS_WRITING_APP",
139 |                     "language": "eng",
140 |                     "default": true,
141 |                     "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit"
142 |                 },
143 |                 {
144 |                     "name": "_STATISTICS_WRITING_DATE_UTC",
145 |                     "language": "eng",
146 |                     "default": true,
147 |                     "string": "2019-07-26 11:08:51"
148 |                 },
149 |                 {
150 |                     "name": "_STATISTICS_TAGS",
151 |                     "language": "eng",
152 |                     "default": true,
153 |                     "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
154 |                 }
155 |             ]
156 |         },
157 |         {
158 |             "targets": [
159 |                 {
160 |                     "id": 26826,
161 |                     "type": 1,
162 |                     "name": "TargetTypeValue",
163 |                     "level": 4,
164 |                     "position": 652854836,
165 |                     "size": 1,
166 |                     "data": 50
167 |                 },
168 |                 {
169 |                     "id": 25541,
170 |                     "type": 1,
171 |                     "name": "TagTrackUID",
172 |                     "level": 4,
173 |                     "position": 652854840,
174 |                     "size": 1,
175 |                     "data": 1
176 |                 },
177 |                 {
178 |                     "id": 25546,
179 |                     "type": 3,
180 |                     "name": "TargetType",
181 |                     "level": 4,
182 |                     "position": 652854844,
183 |                     "size": 5,
184 |                     "data": "MOVIE"
185 |                 }
186 |             ],
187 |             "simpletags": [
188 |                 {
189 |                     "name": "BPS",
190 |                     "language": "eng",
191 |                     "default": true,
192 |                     "string": "1749265"
193 |                 },
194 |                 {
195 |                     "name": "DURATION",
196 |                     "language": "eng",
197 |                     "default": true,
198 |                     "string": "00:42:01.186000000"
199 |                 },
200 |                 {
201 |                     "name": "NUMBER_OF_FRAMES",
202 |                     "language": "eng",
203 |                     "default": true,
204 |                     "string": "60448"
205 |                 },
206 |                 {
207 |                     "name": "NUMBER_OF_BYTES",
208 |                     "language": "eng",
209 |                     "default": true,
210 |                     "string": "551278026"
211 |                 },
212 |                 {
213 |                     "name": "_STATISTICS_WRITING_APP",
214 |                     "language": "eng",
215 |                     "default": true,
216 |                     "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit"
217 |                 },
218 |                 {
219 |                     "name": "_STATISTICS_WRITING_DATE_UTC",
220 |                     "language": "eng",
221 |                     "default": true,
222 |                     "string": "2019-07-26 11:08:51"
223 |                 },
224 |                 {
225 |                     "name": "_STATISTICS_TAGS",
226 |                     "language": "eng",
227 |                     "default": true,
228 |                     "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
229 |                 }
230 |             ]
231 |         },
232 |         {
233 |             "targets": [
234 |                 {
235 |                     "id": 26826,
236 |                     "type": 1,
237 |                     "name": "TargetTypeValue",
238 |                     "level": 4,
239 |                     "position": 652855218,
240 |                     "size": 1,
241 |                     "data": 50
242 |                 },
243 |                 {
244 |                     "id": 25541,
245 |                     "type": 1,
246 |                     "name": "TagTrackUID",
247 |                     "level": 4,
248 |                     "position": 652855222,
249 |                     "size": 1,
250 |                     "data": 2
251 |                 },
252 |                 {
253 |                     "id": 25546,
254 |                     "type": 3,
255 |                     "name": "TargetType",
256 |                     "level": 4,
257 |                     "position": 652855226,
258 |                     "size": 5,
259 |                     "data": "MOVIE"
260 |                 }
261 |             ],
262 |             "simpletags": [
263 |                 {
264 |                     "name": "BPS",
265 |                     "language": "eng",
266 |                     "default": true,
267 |                     "string": "128000"
268 |                 },
269 |                 {
270 |                     "name": "DURATION",
271 |                     "language": "eng",
272 |                     "default": true,
273 |                     "string": "00:42:01.216000000"
274 |                 },
275 |                 {
276 |                     "name": "NUMBER_OF_FRAMES",
277 |                     "language": "eng",
278 |                     "default": true,
279 |                     "string": "78788"
280 |                 },
281 |                 {
282 |                     "name": "NUMBER_OF_BYTES",
283 |                     "language": "eng",
284 |                     "default": true,
285 |                     "string": "40339456"
286 |                 },
287 |                 {
288 |                     "name": "_STATISTICS_WRITING_APP",
289 |                     "language": "eng",
290 |                     "default": true,
291 |                     "string": "mkvmerge v34.0.0 ('Sight and Seen') 64-bit"
292 |                 },
293 |                 {
294 |                     "name": "_STATISTICS_WRITING_DATE_UTC",
295 |                     "language": "eng",
296 |                     "default": true,
297 |                     "string": "2019-07-26 11:08:51"
298 |                 },
299 |                 {
300 |                     "name": "_STATISTICS_TAGS",
301 |                     "language": "eng",
302 |                     "default": true,
303 |                     "string": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
304 |                 }
305 |             ]
306 |         }
307 |     ],
308 |     "recurse_seek_head": false
309 | }


--------------------------------------------------------------------------------
/knowit/providers/mkvmerge.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import logging
  4 | import re
  5 | from decimal import Decimal
  6 | from logging import NullHandler, getLogger
  7 | from subprocess import check_output
  8 | from typing import Union
  9 | 
 10 | from knowit.core import Property
 11 | from knowit.properties import (
 12 |     AudioCodec,
 13 |     Basic,
 14 |     Duration,
 15 |     Language,
 16 |     Quantity,
 17 |     VideoCodec,
 18 |     VideoDimensions,
 19 |     YesNo,
 20 | )
 21 | from knowit.provider import (
 22 |     Executor,
 23 |     MalformedFileError,
 24 |     NotFoundExecutor,
 25 |     Provider,
 26 | )
 27 | from knowit.rules import (
 28 |     AudioChannelsRule,
 29 |     ClosedCaptionRule,
 30 |     HearingImpairedRule,
 31 |     LanguageRule,
 32 |     ResolutionRule,
 33 | )
 34 | from knowit.rules.general import GuessTitleRule
 35 | from knowit.serializer import get_json_encoder
 36 | from knowit.units import units
 37 | from knowit.utils import define_candidate, detect_os
 38 | 
 39 | logger = getLogger(__name__)
 40 | logger.addHandler(NullHandler())
 41 | 
 42 | WARN_MSG = r'''
 43 | =========================================================================================
 44 | mkvmerge not found on your system or could not be loaded.
 45 | Visit https://mkvtoolnix.download to download it.
 46 | If you still have problems, please check if the downloaded version matches your system.
 47 | To load mkvmerge from a specific location, please define the location as follow:
 48 |   knowit --mkvmerge /usr/local/mkvmerge/bin <video_path>
 49 |   knowit --mkvmerge /usr/local/mkvmerge/bin/ffprobe <video_path>
 50 |   knowit --mkvmerge "C:\Program Files\mkvmerge" <video_path>
 51 |   knowit --mkvmerge C:\Software\mkvmerge.exe <video_path>
 52 | =========================================================================================
 53 | '''
 54 | 
 55 | 
 56 | class MkvMergeExecutor(Executor):
 57 |     """Executor that knows how to execute mkvmerge."""
 58 | 
 59 |     version_re = re.compile(r'\bv(?P<version>[^\b\s]+)')
 60 |     locations = {
 61 |         'unix': ('/usr/local/mkvmerge/lib', '/usr/local/mkvmerge/bin', '__PATH__'),
 62 |         'windows': ('__PATH__', ),
 63 |         'macos': ('__PATH__', ),
 64 |     }
 65 | 
 66 |     def extract_info(self, filename):
 67 |         """Extract media info."""
 68 |         json_dump = self._execute(filename)
 69 |         return json.loads(json_dump) if json_dump else {}
 70 | 
 71 |     def _execute(self, filename):
 72 |         raise NotImplementedError
 73 | 
 74 |     @classmethod
 75 |     def _get_version(cls, output):
 76 |         match = cls.version_re.search(output)
 77 |         if match:
 78 |             version = match.groupdict()['version']
 79 |             return version
 80 | 
 81 |     @classmethod
 82 |     def get_executor_instance(cls, suggested_path=None) -> Union["MkvMergeExecutor", NotFoundExecutor]:
 83 |         """Return executor instance."""
 84 |         os_family = detect_os()
 85 |         logger.debug('Detected os: %s', os_family)
 86 |         for exec_cls in (MkvMergeCliExecutor, ):
 87 |             executor = exec_cls.create(os_family, suggested_path)
 88 |             if executor:
 89 |                 return executor
 90 |         return NotFoundExecutor(suggested_path)
 91 | 
 92 | 
 93 | class MkvMergeCliExecutor(MkvMergeExecutor):
 94 |     """Executor that uses mkvmerge cli."""
 95 | 
 96 |     names = {
 97 |         'unix': ('mkvmerge', ),
 98 |         'windows': ('mkvmerge.exe', ),
 99 |         'macos': ('mkvmerge', ),
100 |     }
101 | 
102 |     def _execute(self, filename):
103 |         return check_output([self.location, '-i', '-F', 'json', filename]).decode()
104 | 
105 |     @classmethod
106 |     def create(cls, os_family=None, suggested_path=None):
107 |         """Create the executor instance."""
108 |         for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
109 |             try:
110 |                 output = check_output([candidate, '--version']).decode()
111 |                 version = cls._get_version(output)
112 |                 if version:
113 |                     logger.debug('MkvMerge cli detected: %s v%s', candidate, version)
114 |                     return MkvMergeCliExecutor(candidate, version.split('.'))
115 |             except OSError:
116 |                 pass
117 | 
118 | 
119 | class MkvMergeProvider(Provider):
120 |     """MkvMerge Provider."""
121 | 
122 |     def __init__(self, config, suggested_path=None, *args, **kwargs):
123 |         """Init method."""
124 |         super().__init__(config, {
125 |             'general': {
126 |                 'title': Property('title', description='media title'),
127 |                 'duration': Duration('duration', resolution=Decimal('0.000001'), description='media duration'),
128 |             },
129 |             'video': {
130 |                 'id': Basic('number', data_type=int, description='video track number'),
131 |                 'name': Property('name', description='video track name'),
132 |                 'language': Language('language_ietf', 'language', description='video language'),
133 |                 'width': VideoDimensions('display_dimensions', dimension='width'),
134 |                 'height': VideoDimensions('display_dimensions', dimension='height'),
135 |                 'scan_type': YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive',
136 |                                    config=config, config_key='ScanType',
137 |                                    description='video scan type'),
138 |                 'resolution': None,  # populated with ResolutionRule
139 |                 # 'bit_depth', Property('bit_depth', Integer('video bit depth')),
140 |                 'codec': VideoCodec(config, 'codec_id', description='video codec'),
141 |                 'forced': YesNo('forced_track', hide_value=False, description='video track forced'),
142 |                 'default': YesNo('default_track', hide_value=False, description='video track default'),
143 |                 'enabled': YesNo('enabled_track', hide_value=True, description='video track enabled'),
144 |             },
145 |             'audio': {
146 |                 'id': Basic('number', data_type=int, description='audio track number'),
147 |                 'name': Property('name', description='audio track name'),
148 |                 'language': Language('language_ietf', 'language', description='audio language'),
149 |                 'codec': AudioCodec(config, 'codec_id', description='audio codec'),
150 |                 'channels_count': Basic('audio_channels', data_type=int, description='audio channels count'),
151 |                 'channels': None,  # populated with AudioChannelsRule
152 |                 'sampling_rate': Quantity('audio_sampling_frequency', unit=units.Hz, description='audio sampling rate'),
153 |                 'forced': YesNo('forced_track', hide_value=False, description='audio track forced'),
154 |                 'default': YesNo('default_track', hide_value=False, description='audio track default'),
155 |                 'enabled': YesNo('enabled_track', hide_value=True, description='audio track enabled'),
156 |             },
157 |             'subtitle': {
158 |                 'id': Basic('number', data_type=int, description='subtitle track number'),
159 |                 'name': Property('name', description='subtitle track name'),
160 |                 'language': Language('language_ietf', 'language', description='subtitle language'),
161 |                 'hearing_impaired': None,  # populated with HearingImpairedRule
162 |                 'closed_caption': None,  # populated with ClosedCaptionRule
163 |                 'forced': YesNo('forced_track', hide_value=False, description='subtitle track forced'),
164 |                 'default': YesNo('default_track', hide_value=False, description='subtitle track default'),
165 |                 'enabled': YesNo('enabled_track', hide_value=True, description='subtitle track enabled'),
166 |             },
167 |         }, {
168 |             'video': {
169 |                 'guessed': GuessTitleRule('guessed properties', private=True),
170 |                 'language': LanguageRule('video language', override=True),
171 |                 'resolution': ResolutionRule('video resolution'),
172 |             },
173 |             'audio': {
174 |                 'guessed': GuessTitleRule('guessed properties', private=True),
175 |                 'language': LanguageRule('audio language', override=True),
176 |                 'channels': AudioChannelsRule('audio channels'),
177 |             },
178 |             'subtitle': {
179 |                 'guessed': GuessTitleRule('guessed properties', private=True),
180 |                 'language': LanguageRule('subtitle language', override=True),
181 |                 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
182 |                 'closed_caption': ClosedCaptionRule('closed caption', override=True),
183 |             }
184 |         })
185 |         self.executor = MkvMergeExecutor.get_executor_instance(suggested_path)
186 | 
187 |     def loaded(self) -> bool:
188 |         """If library or executable was found."""
189 |         # if executor is None, print a warning and set to False to not repeat the warning
190 |         if isinstance(self.executor, NotFoundExecutor):
191 |             if not self.executor.warned:
192 |                 logger.warning(WARN_MSG)
193 |                 self.executor.warned = True
194 |         # check if loaded
195 |         return bool(self.executor)
196 | 
197 |     def accepts(self, video_path):
198 |         """Accept Matroska videos when mkvmerge is available."""
199 |         return self.loaded() and video_path.lower().endswith(('.mkv', '.mka', '.mks'))
200 | 
201 |     @classmethod
202 |     def extract_info(cls, video_path):
203 |         """Extract info from the video."""
204 |         return json.loads(check_output(['mkvmerge', '-i', '-F', video_path]).decode())
205 | 
206 |     def describe(self, video_path, context):
207 |         """Return video metadata."""
208 |         data = self.executor.extract_info(video_path)
209 | 
210 |         def debug_data():
211 |             """Debug data."""
212 |             return json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
213 | 
214 |         context['debug_data'] = debug_data
215 | 
216 |         if logger.isEnabledFor(logging.DEBUG):
217 |             logger.debug('Video %r scanned using mkvmerge %r has raw data:\n%s',
218 |                          video_path, self.executor.location, debug_data())
219 | 
220 |         def merge_properties(target: dict):
221 |             """Merge properties sub properties into the target container."""
222 |             return {**{k: v for k, v in target.items() if k != 'properties'}, **target.get('properties', {})}
223 | 
224 |         general_track = merge_properties(data.get('container', {}))
225 |         video_tracks = []
226 |         audio_tracks = []
227 |         subtitle_tracks = []
228 |         for track in data.get('tracks'):
229 |             track_type = track.get('type')
230 |             merged = merge_properties(track)
231 |             if track_type == 'video':
232 |                 video_tracks.append(merged)
233 |             elif track_type == 'audio':
234 |                 audio_tracks.append(merged)
235 |             elif track_type == 'subtitles':
236 |                 subtitle_tracks.append(merged)
237 | 
238 |         result = self._describe_tracks(video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context)
239 | 
240 |         if not result:
241 |             raise MalformedFileError
242 | 
243 |         result['provider'] = {
244 |             'name': 'mkvmerge',
245 |             'version': self.version
246 |         }
247 | 
248 |         return result
249 | 
250 |     @property
251 |     def version(self):
252 |         """Return mkvmerge version information."""
253 |         if not self.executor:
254 |             return {}
255 |         version = '.'.join(map(str, self.executor.version))
256 | 
257 |         return {self.executor.location: f'v{version}'}
258 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import pathlib
  4 | import re
  5 | import sys
  6 | from collections.abc import Mapping
  7 | from datetime import timedelta
  8 | from io import BytesIO
  9 | from zipfile import ZipFile
 10 | 
 11 | import requests
 12 | import yaml
 13 | from yaml.constructor import Constructor
 14 | 
 15 | from knowit import serializer
 16 | from knowit.api import provider_names
 17 | from knowit.serializer import format_property
 18 | from knowit.units import units
 19 | 
 20 | 
 21 | YAML_EXTENSIONS = ('.yml', '.yaml')
 22 | 
 23 | 
 24 | duration_re = re.compile(r'(?P<hours>\d{1,2}):'
 25 |                          r'(?P<minutes>\d{1,2}):'
 26 |                          r'(?P<seconds>\d{1,2})(?:\.'
 27 |                          r'(?P<millis>\d{3})'
 28 |                          r'(?P<micro>\d{3})?\d*)?')
 29 | 
 30 | serializer.YAMLLoader = serializer.get_yaml_loader({
 31 |     'tag:yaml.org,2002:str': lambda constructor, value: _parse_value(value),
 32 |     'tag:yaml.org,2002:seq': Constructor.construct_sequence,
 33 | })
 34 | 
 35 | 
 36 | one_ms = timedelta(milliseconds=1)
 37 | 
 38 | 
 39 | def normalize_path(path: str):
 40 |     return os.fspath(pathlib.Path(path))
 41 | 
 42 | 
 43 | def parameters_from_yaml(name, input_key=None, expected_key=None):
 44 |     package_name, resource_name = name.split('.', 1)
 45 | 
 46 |     files = []
 47 |     for yaml_ext in YAML_EXTENSIONS:
 48 |         yaml_file = os.path.join(package_name, resource_name + yaml_ext)
 49 |         if os.path.isfile(yaml_file):
 50 |             files.append(yaml_file)
 51 |             break
 52 | 
 53 |     parameters = []
 54 |     for file_path in files:
 55 |         data = read_yaml(file_path)
 56 | 
 57 |         if input_key and expected_key:
 58 |             parameters.append((data[expected_key], data[input_key]))
 59 |             continue
 60 | 
 61 |         for root_key, root_value in data.items():
 62 |             if isinstance(root_value, Mapping):
 63 |                 for expected, data_input in root_value.items():
 64 |                     for properties in data_input if isinstance(data_input, (tuple, list)) else [data_input]:
 65 |                         parameters.append((root_key, expected, properties))
 66 |             else:
 67 |                 for properties in root_value if isinstance(root_value, (tuple, list)) else [root_value]:
 68 |                     parameters.append((root_key, properties))
 69 | 
 70 |     return parameters
 71 | 
 72 | 
 73 | def read_file(file_path):
 74 |     with open(file_path, 'r') as f:
 75 |         return f.read()
 76 | 
 77 | 
 78 | def read_yaml(file_path):
 79 |     with open(file_path, 'r', encoding='utf-8') as f:
 80 |         return yaml.load(f, Loader=serializer.YAMLLoader)
 81 | 
 82 | 
 83 | def read_json(file_path):
 84 |     with open(file_path, 'r') as f:
 85 |         return json.loads(f.read())
 86 | 
 87 | 
 88 | def id_func(param):
 89 |     return repr(param)
 90 | 
 91 | 
 92 | class MediaFiles(object):
 93 |     """Represent media files in test/data folder."""
 94 | 
 95 |     def __init__(self):
 96 |         """Initialize the object."""
 97 |         self.videos = MediaFiles._videos()
 98 |         self.datafiles = MediaFiles._provider_datafiles()
 99 | 
100 |     @staticmethod
101 |     def _videos():
102 |         data_path = os.path.join('tests', 'data', 'videos')
103 | 
104 |         # download matroska test suite
105 |         if not os.path.exists(data_path) or len(os.listdir(data_path)) != 8:
106 |             r = requests.get('http://downloads.sourceforge.net/project/matroska/test_files/matroska_test_w1_1.zip')
107 |             with ZipFile(BytesIO(r.content), 'r') as f:
108 |                 f.extractall(data_path, [m for m in f.namelist() if os.path.splitext(m)[1] == '.mkv'])
109 | 
110 |         # populate a dict with mkv files
111 |         files = []
112 |         for path in os.listdir(data_path):
113 |             name, _ = os.path.splitext(path)
114 |             files.append(os.path.join(data_path, path))
115 | 
116 |         return files
117 | 
118 |     @staticmethod
119 |     def _provider_datafiles():
120 |         datafiles = {}
121 |         for provider in provider_names:
122 |             files = []
123 |             data_path = os.path.join('tests', 'data', provider)
124 |             if not os.path.isdir(data_path):
125 |                 continue
126 |             for path in os.listdir(data_path):
127 |                 if not path.lower().endswith(YAML_EXTENSIONS):
128 |                     files.append(os.path.join(data_path, path))
129 | 
130 |             datafiles[provider] = files
131 | 
132 |         return datafiles
133 | 
134 |     def get_real_media(self, provider_name):
135 |         """Return only real video files."""
136 |         return [Media(f, provider_name) for f in self.videos]
137 | 
138 |     def get_xml_media(self, provider_name):
139 |         """Return all videos metadata as xml."""
140 |         return [XmlMedia(f, provider_name) for f in self.datafiles[provider_name]]
141 | 
142 |     def get_yaml_media(self, provider_name):
143 |         """Return all videos metadata as yaml."""
144 |         return [YamlMedia(f, provider_name) for f in self.datafiles[provider_name]]
145 | 
146 |     def get_json_media(self, provider_name):
147 |         """Return all videos metadata as json."""
148 |         return [JsonMedia(f, provider_name) for f in self.datafiles[provider_name]]
149 | 
150 | 
151 | mediafiles = MediaFiles()
152 | 
153 | 
154 | class Media(object):
155 |     """Represent a media."""
156 | 
157 |     def __init__(self, file_path, provider_name):
158 |         """Initialize the object."""
159 |         self.file_path = file_path
160 |         self.provider_name = provider_name
161 | 
162 |     @property
163 |     def video_path(self):
164 |         """Return the video path."""
165 |         return self.file_path
166 | 
167 |     @property
168 |     def expected_data(self):
169 |         """Return the expected video metadata."""
170 |         yaml_file = None
171 |         yaml_folder = os.path.normpath(os.path.join(os.path.split(self.video_path)[0], os.pardir))
172 |         for yaml_ext in YAML_EXTENSIONS:
173 |             yaml_file = os.path.join(yaml_folder, self.provider_name, os.path.basename(self.video_path) + yaml_ext)
174 |             if os.path.isfile(yaml_file):
175 |                 break
176 | 
177 |         if not yaml_file or not os.path.isfile(yaml_file):
178 |             raise IOError('Unable to find expected file for {!r}', self.video_path)
179 | 
180 |         return read_yaml(yaml_file)
181 | 
182 |     def __repr__(self):
183 |         """Return the media representation."""
184 |         return '<{} [{}]>'.format(self.__class__.__name__, self.video_path)
185 | 
186 |     def __str__(self):
187 |         """Return the media path."""
188 |         return self.video_path
189 | 
190 | 
191 | class DataMedia(Media):
192 |     """Represent a video without the real file, only the video metadata."""
193 | 
194 |     @property
195 |     def video_path(self):
196 |         """Return the video path."""
197 |         return os.path.splitext(self.file_path)[0]
198 | 
199 |     @property
200 |     def expected_data(self):
201 |         """Return the expected video metadata."""
202 |         yaml_file = None
203 |         for yaml_ext in YAML_EXTENSIONS:
204 |             yaml_file = self.video_path + yaml_ext
205 |             if os.path.isfile(yaml_file):
206 |                 break
207 | 
208 |         if not yaml_file or not os.path.isfile(yaml_file):
209 |             raise IOError('Unable to find expected file for {!r}', self.video_path)
210 | 
211 |         return read_yaml(yaml_file)
212 | 
213 | 
214 | class XmlMedia(DataMedia):
215 |     """Represent a video without the real file, only the video metadata as xml."""
216 | 
217 |     @property
218 |     def input_data(self):
219 |         """Return the video metadata as xml."""
220 |         return read_file(self.file_path)
221 | 
222 | 
223 | class YamlMedia(DataMedia):
224 |     """Represent a video without the real file, only the video metadata as yaml."""
225 | 
226 |     @property
227 |     def input_data(self):
228 |         """Return the video metadata as yaml."""
229 |         return read_yaml(self.file_path)
230 | 
231 | 
232 | class JsonMedia(DataMedia):
233 |     """Represent a video without the real file, only the video metadata as json."""
234 | 
235 |     @property
236 |     def input_data(self):
237 |         """Return the video metadata as json."""
238 |         return read_json(self.file_path)
239 | 
240 | 
241 | def _parse_value(node):
242 |     def parse_duration(value):
243 |         match = duration_re.match(value)
244 |         if match:
245 |             h, m, s, ms, mc = match.groups('0')
246 |             return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms), microseconds=int(mc))
247 |         return value
248 | 
249 |     def parse_quantity(value):
250 |         if isinstance(value, str):
251 |             for unit in ('pixel', 'bit', 'byte', 'FPS', 'bps', 'Hz'):
252 |                 if value.endswith(' ' + unit):
253 |                     return units(value[:-len(unit)]) * units(unit)
254 | 
255 |         return value
256 | 
257 |     result = node.value
258 |     for method in (parse_quantity, parse_duration):
259 |         if result and isinstance(result, str):
260 |             result = method(node.value)
261 |     return result
262 | 
263 | 
264 | def is_iterable(obj):
265 |     return isinstance(obj, (tuple, list))
266 | 
267 | 
268 | def to_string(profile: str, value):
269 |     formatted_value = format_property(profile, value)
270 |     return str(formatted_value) if formatted_value is not None else None
271 | 
272 | 
273 | def check_equals(expected, actual, different, options, prefix=''):
274 |     if isinstance(expected, Mapping):
275 |         check_mapping_equals(expected, actual, different=different, options=options, prefix=prefix)
276 |     elif is_iterable(expected):
277 |         check_sequence_equals(expected, actual, different=different, options=options, prefix=prefix)
278 |     elif isinstance(expected, timedelta):
279 |         check_timedelta_equals(expected, actual, different=different, prefix=prefix)
280 |     elif to_string(options['profile'], expected) != to_string(options['profile'], actual):
281 |         different.append((prefix, expected, actual))
282 | 
283 | 
284 | def check_timedelta_equals(expected, actual, different, prefix=''):
285 |     if not isinstance(actual, timedelta) or not (expected - one_ms) <= actual <= (expected + one_ms):
286 |         different.append((prefix, expected, actual))
287 | 
288 | 
289 | def check_sequence_equals(expected, actual, different, options, prefix=''):
290 |     if not is_iterable(actual) or len(expected) != len(actual):
291 |         different.append((prefix, expected, actual))
292 |         return
293 | 
294 |     for i, expected_value in enumerate(expected):
295 |         actual_value = actual[i]
296 |         key = '{0}[{1}].'.format(prefix, i)
297 |         check_equals(expected_value, actual_value, different=different, options=options, prefix=key)
298 | 
299 | 
300 | def check_mapping_equals(expected, actual, different, options, prefix=''):
301 |     if not isinstance(actual, Mapping):
302 |         different.append(('', expected, actual))
303 |         return
304 | 
305 |     for expected_key, expected_value in expected.items():
306 |         if expected_key == 'media_type':
307 |             continue
308 | 
309 |         if expected_key not in actual:
310 |             different.append((prefix + expected_key, expected_value, None))
311 |             continue
312 | 
313 |         actual_value = actual[expected_key]
314 |         key = prefix + expected_key
315 | 
316 |         if expected_key == 'path':
317 |             expected_value = normalize_path(expected_value)
318 |             actual_value = normalize_path(actual_value)
319 | 
320 |         check_equals(expected_value, actual_value, different=different, options=options, prefix=key)
321 | 
322 |     for actual_key, actual_value in actual.items():
323 |         if actual_key not in expected:
324 |             different.append((prefix + actual_key, None, actual_value))
325 |             continue
326 | 
327 | 
328 | def assert_expected(expected, actual, options=None):
329 |     version = None
330 |     if 'provider' in actual:
331 |         version = actual['provider']['version']
332 |         del actual['provider']['version']
333 | 
334 |     different = []
335 |     check_equals(expected, actual, different=different, options=options or {'profile': 'default'})
336 |     for (key, expected, actual) in different:
337 |         print('{0}: Expected {1} got {2}'.format(key, expected, actual), file=sys.stderr)
338 | 
339 |     if different and options and options.get('debug_data'):
340 |         print(f'Version: {version}')
341 |         print(options['debug_data']())
342 | 
343 |     assert not different
344 | 


--------------------------------------------------------------------------------